diff --git a/backend/app/api/errors.py b/backend/app/api/errors.py index 7297727e..be4cab7c 100644 --- a/backend/app/api/errors.py +++ b/backend/app/api/errors.py @@ -64,6 +64,11 @@ { # feat_auto_followup_studies Story 1.1 — StudyConfigSpec.auto_followup_depth "AUTO_FOLLOWUP_DEPTH_OUT_OF_RANGE", + # feat_overnight_final_solution Story 1.1 — StudyConfigSpec.auto_followup_strategy + # Covers both the value-rule and pair-rule (depth ≥ 1) failures, plus + # the worker-managed-key reject (auto_followup_visited_template_ids + # / auto_followup_selected_kind set by an operator at create time). + "AUTO_FOLLOWUP_STRATEGY_INVALID", } ) diff --git a/backend/app/api/v1/schemas.py b/backend/app/api/v1/schemas.py index 400fdc04..f2b644a1 100644 --- a/backend/app/api/v1/schemas.py +++ b/backend/app/api/v1/schemas.py @@ -721,6 +721,56 @@ class StudyConfigSpec(BaseModel): carry ``AUTO_FOLLOWUP_DEPTH_OUT_OF_RANGE`` per spec §8.5 (the prefix parser in :mod:`backend.app.api.errors` picks up the ``:`` prefix from the raised ValueError message).""" + auto_followup_strategy: str | None = Field(default=None) + """feat_overnight_final_solution FR-1 + D-13: ``"narrow"`` | ``"follow_suggestions"`` + | ``None`` (treated as ``"narrow"`` by the worker). + + **Field type is ``str | None`` (NOT ``Literal[...]``)** — per spec D-13, + a field-level ``Literal`` would surface bad values as Pydantic's generic + ``VALIDATION_ERROR`` envelope BEFORE the ``mode="after"`` validator + could emit the canonical ``AUTO_FOLLOWUP_STRATEGY_INVALID`` code. Same + pattern as ``auto_followup_depth`` above: enum check + pair rule done + in :meth:`_validate_auto_followup_strategy` via the ``:`` prefix + convention so :func:`backend.app.api.errors.validation_exception_handler` + unwraps the canonical envelope. The two accepted values are exposed as + the module-level :data:`AUTO_FOLLOWUP_STRATEGY_VALUES` tuple (consumed + by the CI source-of-truth grep gate and mirrored as + ``OVERNIGHT_STRATEGY_VALUES`` in ``ui/src/lib/enums.ts``).""" + + @model_validator(mode="before") + @classmethod + def _reject_worker_managed_keys(cls, data: object) -> object: + """Reject operator-submitted worker-managed JSONB keys (D-14). + + ``auto_followup_visited_template_ids`` + ``auto_followup_selected_kind`` + are written ONLY by the autopilot worker on chain children. Allowing + the wizard to seed them would break the single-writer rule for the + cycle-guard list and risk spoofed badges on the chain panel. + + ``StudyConfigSpec`` defaults to ``extra="ignore"`` (Pydantic default + — no ``model_config`` declared above), so an unknown key is silently + dropped before any ``mode="after"`` validator runs. This + ``mode="before"`` validator inspects the raw dict so the keys + actually get rejected with the canonical envelope. + + We deliberately do NOT set ``extra="forbid"`` model-wide: that would + broaden the blast radius and reject any future config key during + rollout (a stored config re-validated through this model in a + worker would fail). + """ + if not isinstance(data, dict): + return data + forbidden_keys = ( + "auto_followup_visited_template_ids", + "auto_followup_selected_kind", + ) + for key in forbidden_keys: + if key in data: + raise ValueError( + f"AUTO_FOLLOWUP_STRATEGY_INVALID: config.{key} is worker-managed " + "and may not be set at study creation" + ) + return data @model_validator(mode="after") def _require_one_stop_condition(self) -> StudyConfigSpec: @@ -748,6 +798,42 @@ def _validate_auto_followup_depth(self) -> StudyConfigSpec: ) return self + @model_validator(mode="after") + def _validate_auto_followup_strategy(self) -> StudyConfigSpec: + """feat_overnight_final_solution FR-1 + D-13: enum + pair check. + + Two rules: (a) value MUST be in :data:`AUTO_FOLLOWUP_STRATEGY_VALUES` + when set, (b) value MUST only be set when ``auto_followup_depth >= 1`` + (a strategy choice on a depth-0 study is meaningless). + + Both surface as ``AUTO_FOLLOWUP_STRATEGY_INVALID`` via the + ``:`` prefix convention (allowlisted in + :data:`backend.app.api.errors._CUSTOM_ERROR_CODE_ALLOWLIST`). + """ + if self.auto_followup_strategy is None: + return self + if self.auto_followup_strategy not in AUTO_FOLLOWUP_STRATEGY_VALUES: + raise ValueError( + "AUTO_FOLLOWUP_STRATEGY_INVALID: config.auto_followup_strategy " + f"must be 'narrow' or 'follow_suggestions'; " + f"got {self.auto_followup_strategy!r}" + ) + if self.auto_followup_depth is None or self.auto_followup_depth < 1: + raise ValueError( + "AUTO_FOLLOWUP_STRATEGY_INVALID: config.auto_followup_strategy " + "only applies when config.auto_followup_depth >= 1" + ) + return self + + +# feat_overnight_final_solution Story 1.1 / D-13 — wire-value source of truth +# for ``StudyConfigSpec.auto_followup_strategy``. Mirrored by the frontend +# ``OVERNIGHT_STRATEGY_VALUES`` in ``ui/src/lib/enums.ts`` and consumed by +# the CI grep gate at ``scripts/ci/verify_enum_source_of_truth.sh``. Keep +# this declaration module-level (NOT inside the class) so the grep gate's +# AST resolver finds the bare tuple assignment. +AUTO_FOLLOWUP_STRATEGY_VALUES: tuple[str, ...] = ("narrow", "follow_suggestions") + class ParentFollowupRef(BaseModel): """Optional lineage payload on ``POST /api/v1/studies``. @@ -883,6 +969,28 @@ class StudyChainLink(BaseModel): failed_reason: str | None created_at: datetime completed_at: datetime | None + template_id: str + """``studies.template_id`` — needed by the chain panel's swap_template + badge so the frontend can resolve the target template's display name + via ``GET /api/v1/query-templates/{id}``. Added by Story 3.1 per + P1-B5 (the badge is otherwise not buildable from the chain payload + alone). Non-optional — every study has a template.""" + selected_followup_kind: Literal["narrow_default", "narrow", "widen", "swap_template"] | None = ( + None + ) + """feat_overnight_final_solution Story 3.1 / FR-6 — the path + :func:`backend.app.workers.auto_followup.enqueue_followup_study` took + when creating this link. ``null`` for the anchor (no parent + follow-up to consume) and for every link created under the legacy + ``"narrow"`` strategy (per D-12 the legacy path persists no + ``auto_followup_selected_kind`` key). The chain endpoint applies a + defensive coercion before populating this field: an unknown JSONB + value in ``studies.config.auto_followup_selected_kind`` (manual DB + INSERT, schema drift) coerces to ``null`` + a + ``chain_selected_kind_unknown`` WARN — never raises a Pydantic + ``ValidationError`` that would 500 the endpoint. Mirrored + character-for-character by ``ui/src/lib/enums.ts SELECTED_FOLLOWUP_KIND_VALUES`` + (Story 3.2).""" class StudyChainResponse(BaseModel): diff --git a/backend/app/api/v1/studies.py b/backend/app/api/v1/studies.py index e1c5bd62..333b499d 100644 --- a/backend/app/api/v1/studies.py +++ b/backend/app/api/v1/studies.py @@ -41,6 +41,7 @@ from datetime import datetime from typing import Annotated, Any +import structlog import uuid_utils from fastapi import APIRouter, Depends, HTTPException, Query, Request, Response, status from pydantic import ValidationError @@ -62,6 +63,9 @@ from backend.app.db import repo from backend.app.db.models import Study from backend.app.db.session import get_db +from backend.app.domain.study.auto_followup_strategy import ( + SELECTED_FOLLOWUP_KIND_VALUES, +) from backend.app.domain.study.chain_summary import ( _direction_normalized_delta_from_prev, compute_cumulative_lift, @@ -84,6 +88,7 @@ ) from backend.app.services.study_preflight import MIN_OVERLAP, probe_judgment_overlap +logger = structlog.get_logger(__name__) router = APIRouter() DEFAULT_PAGE_LIMIT = 50 @@ -863,6 +868,26 @@ async def get_study_chain( if not link_entries else _direction_normalized_delta_from_prev(lk.best_metric, prev_metric, link_direction) ) + # feat_overnight_final_solution Story 3.1 / FR-6 — defensive + # coercion for the new selected_followup_kind field. studies.config + # is JSONB with no CHECK; a malformed value (manual INSERT, schema + # drift, future version row read by an older deploy) must NOT + # surface as a Pydantic ValidationError that 500s the endpoint. + # Mirrors the parse_followup_list defensive-ingest contract for + # digests.suggested_followups. Per spec D-12, legacy/default + # chains write no key at all, so the absent case is the COMMON + # path here — only unknown non-None values trigger the WARN. + raw_selected_kind = lk.config.get("auto_followup_selected_kind") + selected_kind: str | None = ( + raw_selected_kind if raw_selected_kind in SELECTED_FOLLOWUP_KIND_VALUES else None + ) + if raw_selected_kind is not None and raw_selected_kind not in SELECTED_FOLLOWUP_KIND_VALUES: + logger.warning( + "chain selected_followup_kind has unknown value; coerced to null", + event_type="chain_selected_kind_unknown", + study_id=lk.id, + raw_value=str(raw_selected_kind)[:64], + ) link_entries.append( StudyChainLink( id=lk.id, @@ -877,6 +902,8 @@ async def get_study_chain( failed_reason=lk.failed_reason, created_at=lk.created_at, completed_at=lk.completed_at, + template_id=lk.template_id, + selected_followup_kind=selected_kind, ) ) prev_metric = lk.best_metric diff --git a/backend/app/domain/study/auto_followup_strategy.py b/backend/app/domain/study/auto_followup_strategy.py new file mode 100644 index 00000000..ed274c5c --- /dev/null +++ b/backend/app/domain/study/auto_followup_strategy.py @@ -0,0 +1,186 @@ +# SPDX-FileCopyrightText: 2026 soundminds.ai +# +# SPDX-License-Identifier: Apache-2.0 + +"""Pure-domain selector for the autopilot's ``follow_suggestions`` strategy. + +Owner: ``feat_overnight_final_solution`` Story 2.1. + +When :data:`backend.app.db.models.study.Study.config` carries +``auto_followup_strategy = "follow_suggestions"``, the autopilot worker +(:mod:`backend.app.workers.auto_followup`) consumes the parent's persisted +digest follow-ups instead of always running the ±50% narrow on the same +template. This module is the pure-domain selector that walks the digest's +``suggested_followups`` list, filters to executable kinds, applies the +cycle guard (no ``swap_template`` whose target is already in +``parent.config.auto_followup_visited_template_ids``), and returns a +:class:`SelectionOutcome` carrying everything the worker needs for both +the dispatch decision AND the telemetry it must emit afterwards +(``source_index``, ``candidate_count``, ``dropped_template_ids``). + +**Pure** — no DB, no I/O, no async. Deterministic: same input → same +output. Unit-testable without fixtures. + +**Always returns a ``SelectionOutcome``** (never ``None``). The +"no executable candidate" case is encoded as ``selected is None`` so the +fallback-event telemetry can still carry ``dropped_template_ids`` for +diagnostics — when every executable item was a ``swap_template`` to an +already-visited template, the operator immediately sees "the chain wanted +to ping-pong but the guard fired" from one log line. + +Spec: ``docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/feature_spec.md`` +(FR-4 + spec FR-3 dispatch + cycle 1 finding C1-A2 + cycle 2 finding C2-A1). +""" + +from __future__ import annotations + +from dataclasses import dataclass + +from backend.app.domain.study.followups import ( + FollowupItem, + NarrowFollowup, + SwapTemplateFollowup, + TextFollowup, + WidenFollowup, +) + +# feat_overnight_final_solution Story 2.1 / FR-6 — wire-value source of +# truth for ``StudyChainLink.selected_followup_kind``. Mirrored by the +# frontend ``SELECTED_FOLLOWUP_KIND_VALUES`` in ``ui/src/lib/enums.ts`` +# (added by Story 3.2). Consumed by the CI grep gate at +# ``scripts/ci/verify_enum_source_of_truth.sh``. +# +# ``"narrow_default"`` marks a chain link the worker took via the narrow +# fallback path under the ``follow_suggestions`` strategy — distinct from +# the legacy/default narrow path (which persists NO ``auto_followup_selected_kind`` +# key at all, per D-12). +SELECTED_FOLLOWUP_KIND_VALUES: tuple[str, ...] = ( + "narrow_default", + "narrow", + "widen", + "swap_template", +) + + +@dataclass(frozen=True, slots=True) +class SelectionOutcome: + """The result of :func:`select_executable_followup`. + + ``selected`` is ``None`` when no executable candidate remained after + the cycle-guard filter — the worker dispatches the fallback-to-narrow + path in that case. ``dropped_template_ids`` is **always** populated + with the cycle-guard-dropped ``SwapTemplateFollowup.template_id`` + values (sorted ascending for deterministic telemetry) — even when + ``selected is None``, so the fallback event carries the same + drop-diagnostics as a successful selection. + """ + + selected: FollowupItem | None + """The executable follow-up to dispatch, or ``None`` to fall back.""" + + source_index: int | None + """0-based index of the selected item in the ORIGINAL ``followups`` list + (not in the post-filter list), so telemetry can correlate with the + digest's persisted order. ``None`` when ``selected is None``.""" + + candidate_count: int + """Count of executable items in contention AFTER cycle-guard filtering. + ``0`` when no executable item remained.""" + + dropped_template_ids: list[str] + """Cycle-guard-dropped ``SwapTemplateFollowup.template_id`` values, + sorted ascending. Empty when no swap_template was dropped (e.g. the + digest had only narrow/widen executables, or only text).""" + + +def select_executable_followup( + followups: list[FollowupItem], + visited_template_ids: set[str], +) -> SelectionOutcome: + """Select the top executable follow-up for the autopilot to dispatch. + + Walks ``followups`` once, recording each item's original index. Drops: + + * :class:`~backend.app.domain.study.followups.TextFollowup` items + (no ``search_space`` — nothing to run). + * :class:`~backend.app.domain.study.followups.SwapTemplateFollowup` + items whose ``template_id`` is in ``visited_template_ids`` (the + cycle guard — prevents template ping-pong). + + The first remaining item by original index is the selection. + Relies on the digest's already-ordered list (convergence-aware + ordering per ``prompts/digest_narrative.system.md`` lines 99-121) — + no re-ranking inside the autopilot (D-5). + + The cycle guard is **template-based, NOT search-space-based** (D-9): + a ``narrow`` / ``widen`` that keeps the same template is allowed + even if the parent's template is in the visited set — only + ``swap_template`` items go through the cycle guard, and only against + their ``template_id``. + + The function is **always** total: it returns a :class:`SelectionOutcome` + even when no executable item remains (with ``selected=None`` + + ``source_index=None`` + ``candidate_count=0`` + the dropped IDs). The + worker uses the populated ``dropped_template_ids`` on the fallback + path so the telemetry distinguishes "digest was text-heavy" from + "all executables were cycle-dropped". + + Args: + followups: The parent digest's ``suggested_followups`` list, + already parsed by :func:`backend.app.domain.study.followups.parse_followup_list`. + May be empty. + visited_template_ids: Templates already visited in this chain, + constructed by the worker from + ``parent.config.get("auto_followup_visited_template_ids", [parent.template_id])``. + The worker does NOT add the prospective child template + BEFORE calling — the cycle guard's job is to look backward + only (D-9). + + Returns: + A :class:`SelectionOutcome` describing the selection (or + absence thereof) plus telemetry fields. Never raises; + deterministic (same input → same output). + """ + dropped_template_ids: list[str] = [] + # Executable candidates that survived BOTH filters, with their + # original index recorded for the source_index telemetry field. + candidates: list[tuple[int, FollowupItem]] = [] + + for original_index, item in enumerate(followups): + # Drop text — no search_space to consume. + if isinstance(item, TextFollowup): + continue + # Cycle guard: swap_template to a visited template is dropped. + if isinstance(item, SwapTemplateFollowup) and item.template_id in visited_template_ids: + dropped_template_ids.append(item.template_id) + continue + # narrow / widen / non-cycled swap_template are all executable. + if isinstance(item, (NarrowFollowup, WidenFollowup, SwapTemplateFollowup)): + candidates.append((original_index, item)) + + dropped_template_ids.sort() + + if not candidates: + return SelectionOutcome( + selected=None, + source_index=None, + candidate_count=0, + dropped_template_ids=dropped_template_ids, + ) + + # First executable item by original index — trust the digest's + # convergence-aware ordering (D-5). + source_index, selected = candidates[0] + return SelectionOutcome( + selected=selected, + source_index=source_index, + candidate_count=len(candidates), + dropped_template_ids=dropped_template_ids, + ) + + +__all__ = [ + "SELECTED_FOLLOWUP_KIND_VALUES", + "SelectionOutcome", + "select_executable_followup", +] diff --git a/backend/tests/contract/test_studies_api_contract.py b/backend/tests/contract/test_studies_api_contract.py index 4a5b0087..d748a305 100644 --- a/backend/tests/contract/test_studies_api_contract.py +++ b/backend/tests/contract/test_studies_api_contract.py @@ -302,6 +302,81 @@ def test_study_config_coerces_string_depth_per_pydantic_v2() -> None: assert cfg.auto_followup_depth == 3 +# --------------------------------------------------------------------------- +# feat_overnight_final_solution Story 1.1 — StudyConfigSpec.auto_followup_strategy +# --------------------------------------------------------------------------- + + +def test_study_config_accepts_none_auto_followup_strategy() -> None: + """FR-1: None (or missing key) is the wire default — treated as 'narrow' + by the worker. No depth requirement when strategy is None.""" + cfg = StudyConfigSpec(max_trials=20) + assert cfg.auto_followup_strategy is None + + +@pytest.mark.parametrize("strategy", ["narrow", "follow_suggestions"]) +def test_study_config_accepts_valid_auto_followup_strategy(strategy: str) -> None: + """FR-1: both wire values are accepted when paired with depth >= 1.""" + cfg = StudyConfigSpec(max_trials=20, auto_followup_depth=3, auto_followup_strategy=strategy) + assert cfg.auto_followup_strategy == strategy + + +def test_study_config_rejects_unknown_auto_followup_strategy_value() -> None: + """FR-1 + D-13 value-rule: a non-allowed value raises ValidationError + carrying the AUTO_FOLLOWUP_STRATEGY_INVALID prefix that the error + handler unwraps. AC-2.""" + with pytest.raises(ValidationError, match="AUTO_FOLLOWUP_STRATEGY_INVALID"): + StudyConfigSpec( + max_trials=20, auto_followup_depth=3, auto_followup_strategy="broaden_everything" + ) + + +def test_study_config_rejects_strategy_without_depth() -> None: + """FR-1 pair-rule: strategy set but depth is None raises with the + AUTO_FOLLOWUP_STRATEGY_INVALID prefix. AC-1.""" + with pytest.raises(ValidationError, match="AUTO_FOLLOWUP_STRATEGY_INVALID"): + StudyConfigSpec(max_trials=20, auto_followup_strategy="follow_suggestions") + + +def test_study_config_rejects_strategy_with_depth_zero() -> None: + """FR-1 pair-rule: strategy set but depth==0 (the worker-internal + terminal value) raises — the operator-facing rule is depth >= 1.""" + with pytest.raises(ValidationError, match="AUTO_FOLLOWUP_STRATEGY_INVALID"): + StudyConfigSpec( + max_trials=20, auto_followup_depth=0, auto_followup_strategy="follow_suggestions" + ) + + +def test_study_config_rejects_operator_submitted_visited_template_ids() -> None: + """D-14: ``auto_followup_visited_template_ids`` is worker-managed — + operators cannot seed it at study creation. The ``mode='before'`` + validator catches it before Pydantic's default ``extra='ignore'`` would + silently drop it. AC-D14 / single-writer rule.""" + with pytest.raises(ValidationError, match="AUTO_FOLLOWUP_STRATEGY_INVALID"): + StudyConfigSpec.model_validate( + { + "max_trials": 20, + "auto_followup_depth": 3, + "auto_followup_strategy": "follow_suggestions", + "auto_followup_visited_template_ids": ["TEMPLATE_A"], + } + ) + + +def test_study_config_rejects_operator_submitted_selected_kind() -> None: + """D-14: ``auto_followup_selected_kind`` is per-link worker-managed + state. Same single-writer rule.""" + with pytest.raises(ValidationError, match="AUTO_FOLLOWUP_STRATEGY_INVALID"): + StudyConfigSpec.model_validate( + { + "max_trials": 20, + "auto_followup_depth": 3, + "auto_followup_strategy": "follow_suggestions", + "auto_followup_selected_kind": "swap_template", + } + ) + + def test_objective_spec_rejects_invalid_k() -> None: with pytest.raises(ValidationError): ObjectiveSpec(metric="ndcg", k=7) diff --git a/backend/tests/contract/test_studies_chain_contract.py b/backend/tests/contract/test_studies_chain_contract.py index cbfb2aaa..45b68cc6 100644 --- a/backend/tests/contract/test_studies_chain_contract.py +++ b/backend/tests/contract/test_studies_chain_contract.py @@ -33,7 +33,13 @@ def test_chain_response_top_level_keys() -> None: } -def test_chain_link_twelve_fields() -> None: +def test_chain_link_fourteen_fields() -> None: + """feat_overnight_final_solution Story 3.1 — two additive fields: + ``template_id`` (P1-B5; needed by the chain panel's swap_template + badge to resolve the target template's display name) and + ``selected_followup_kind`` (FR-6; soft-contract additive Literal). + Both are additive — older clients still parse the response. + Field count rises 12 → 14.""" assert set(StudyChainLink.model_fields) == { "id", "name", @@ -47,8 +53,37 @@ def test_chain_link_twelve_fields() -> None: "failed_reason", "created_at", "completed_at", + "template_id", + "selected_followup_kind", } - assert len(StudyChainLink.model_fields) == 12 + assert len(StudyChainLink.model_fields) == 14 + + +def test_chain_link_selected_followup_kind_is_literal_with_four_values() -> None: + """feat_overnight_final_solution Story 3.1 / FR-6 — the Literal + on the response model must mirror SELECTED_FOLLOWUP_KIND_VALUES + character-for-character so the frontend mirror in enums.ts and + this contract cannot silently drift.""" + from backend.app.domain.study.auto_followup_strategy import ( + SELECTED_FOLLOWUP_KIND_VALUES, + ) + + annotation = StudyChainLink.model_fields["selected_followup_kind"].annotation + args = typing.get_args(annotation) + # Optional[Literal[...]] renders as Union[Literal[...], None]; the + # Literal is the non-None entry. + literal_arg = next(a for a in args if a is not type(None)) + assert set(typing.get_args(literal_arg)) == set(SELECTED_FOLLOWUP_KIND_VALUES) + + +def test_chain_link_template_id_is_required_string() -> None: + """feat_overnight_final_solution Story 3.1 / P1-B5 — every study + has a template_id, so this field is non-optional. The chain panel's + swap_template badge depends on it for the per-link template-name + fetch (Story 3.2).""" + info = StudyChainLink.model_fields["template_id"] + assert info.annotation is str + assert info.is_required() def test_stop_reason_literal_matches_frozenset() -> None: diff --git a/backend/tests/integration/test_auto_followup_strategy.py b/backend/tests/integration/test_auto_followup_strategy.py new file mode 100644 index 00000000..61185e64 --- /dev/null +++ b/backend/tests/integration/test_auto_followup_strategy.py @@ -0,0 +1,659 @@ +# SPDX-FileCopyrightText: 2026 soundminds.ai +# +# SPDX-License-Identifier: Apache-2.0 + +"""Integration tests for ``follow_suggestions`` strategy dispatch (Story 2.2). + +Real Postgres + real Redis. Covers the eight worker-level assertions from +the plan's Story 2.2 DoD: AC-3 (legacy byte-identical), AC-6 (narrow +consumed), AC-7 (swap branches template_id), AC-8 (cycle guard → widen + +dropped_template_ids in telemetry), AC-9 (fallback on text-only), AC-10 +(strategy inherited verbatim), AC-17 (deleted swap target → WARN + +fallback), AC-18 (no parent-kind leak), plus the P1-B4 exception +fallback. Companion to ``test_auto_followup.py`` (the legacy-path tests), +which MUST continue passing unmodified (backward-compat gate, FR-3). +""" + +from __future__ import annotations + +import uuid +from datetime import UTC, datetime +from typing import Any +from unittest.mock import AsyncMock, MagicMock + +import pytest +from redis.asyncio import Redis + +from backend.app.core.settings import get_settings +from backend.app.db import repo +from backend.app.db.session import get_session_factory +from backend.app.llm.budget_gate import daily_key +from backend.tests.conftest import postgres_reachable + +pytestmark = [ + pytest.mark.integration, + pytest.mark.skipif( + not postgres_reachable(), + reason="Postgres not reachable — see docs/03_runbooks/local-dev.md", + ), +] + + +# A small valid SearchSpace dict reused across digest fixtures. +_VALID_SEARCH_SPACE_DICT: dict[str, Any] = { + "params": {"title_boost": {"type": "float", "low": 0.5, "high": 2.0}}, +} + + +async def _seed_parent_with_digest( + *, + strategy: str | None, + auto_followup_depth: int | None = 3, + digest_followups: list[dict[str, Any]] | None = None, + visited_template_ids: list[str] | None = None, + parent_selected_kind: str | None = None, + extra_template_ids: int = 0, +) -> dict[str, str]: + """Seed the chain: cluster + parent template + (optional) extra + swap-target templates + query_set + judgment_list + parent study + + 20 complete trials + (optional) digest row. + + Args: + strategy: ``auto_followup_strategy`` to set on parent's config. + ``None`` writes no key (legacy path). + auto_followup_depth: parent depth. + digest_followups: ``suggested_followups`` JSONB array — if + ``None``, no digest row is created at all (tests that + exercise the missing-digest defensive path). + visited_template_ids: pre-existing visited-list to seed on + ``parent.config`` (for the AC-8 cycle-guard test). + parent_selected_kind: pre-existing ``auto_followup_selected_kind`` + to seed on parent (for the AC-18 stale-leak test). + extra_template_ids: number of additional swap-target query + templates to seed (for the swap_template tests). Their ids + are returned under ``extra_template_ids`` key. + """ + suffix = uuid.uuid4().hex[:8] + factory = get_session_factory() + async with factory() as db: + cluster = await repo.create_cluster( + db, + id=str(uuid.uuid4()), + name=f"af-strat-cluster-{suffix}", + engine_type="elasticsearch", + environment="dev", + base_url="http://stub:9200", + auth_kind="es_basic", + credentials_ref="ref", + ) + template = await repo.create_query_template( + db, + id=str(uuid.uuid4()), + name=f"af-strat-tmpl-{suffix}", + engine_type="elasticsearch", + body='{"query": {"match": {"body": "{{ query }}"}}}', + declared_params={"title_boost": "float"}, + version=1, + ) + extra_ids: list[str] = [] + for i in range(extra_template_ids): + extra = await repo.create_query_template( + db, + id=str(uuid.uuid4()), + name=f"af-strat-extra-{i}-{suffix}", + engine_type="elasticsearch", + body='{"query": {"match": {"body": "{{ query }}"}}}', + declared_params={"title_boost": "float"}, + version=1, + ) + extra_ids.append(extra.id) + query_set = await repo.create_query_set( + db, + id=str(uuid.uuid4()), + name=f"af-strat-qs-{suffix}", + cluster_id=cluster.id, + ) + await repo.create_query( + db, + id=str(uuid.uuid4()), + query_set_id=query_set.id, + query_text="q1", + ) + jl = await repo.create_judgment_list( + db, + id=str(uuid.uuid4()), + name=f"af-strat-jl-{suffix}", + description=None, + query_set_id=query_set.id, + cluster_id=cluster.id, + target="stub-index", + current_template_id=template.id, + rubric="r", + status="complete", + failed_reason=None, + calibration=None, + ) + + parent_id = str(uuid.uuid4()) + config: dict[str, Any] = {"max_trials": 20} + if auto_followup_depth is not None: + config["auto_followup_depth"] = auto_followup_depth + if strategy is not None: + config["auto_followup_strategy"] = strategy + if visited_template_ids is not None: + config["auto_followup_visited_template_ids"] = list(visited_template_ids) + if parent_selected_kind is not None: + config["auto_followup_selected_kind"] = parent_selected_kind + parent = await repo.create_study( + db, + id=parent_id, + name=f"af-strat-parent-{suffix}", + cluster_id=cluster.id, + target="stub-index", + template_id=template.id, + query_set_id=query_set.id, + judgment_list_id=jl.id, + search_space={"params": {"title_boost": {"type": "float", "low": 0.5, "high": 5.0}}}, + objective={"metric": "ndcg", "k": 10, "direction": "maximize"}, + config=config, + status="completed", + optuna_study_name=parent_id, + ) + + # 20 complete trials — first decile (first 2) at metric=0.30, + # rest at 0.40. parent.best_metric=0.50 ⇒ lift=0.20 > epsilon. + best_trial_id: str | None = None + for i in range(20): + metric = 0.30 if i < 2 else 0.40 + tid = str(uuid.uuid4()) + await repo.create_trial( + db, + id=tid, + study_id=parent.id, + optuna_trial_number=i, + params={"title_boost": 2.0 + (i / 100)}, + primary_metric=metric, + metrics={"ndcg@10": metric}, + duration_ms=10, + status="complete", + error=None, + started_at=datetime.now(UTC), + ended_at=datetime.now(UTC), + ) + if i == 19: + best_trial_id = tid + + from backend.app.services.study_state import _GUARD_KEY + + db.sync_session.info[_GUARD_KEY] = True + try: + parent.best_metric = 0.50 + parent.best_trial_id = best_trial_id + await db.flush() + finally: + db.sync_session.info.pop(_GUARD_KEY, None) + + if digest_followups is not None: + await repo.create_digest( + db, + id=str(uuid.uuid4()), + study_id=parent.id, + narrative="seeded digest for strategy dispatch tests", + parameter_importance={"title_boost": 1.0}, + recommended_config={"title_boost": 1.5}, + suggested_followups=digest_followups, + generated_by="local:test-fixture", + ) + + await db.commit() + + return { + "parent_id": parent_id, + "cluster_id": cluster.id, + "template_id": template.id, + "query_set_id": query_set.id, + "judgment_list_id": jl.id, + # Comma-joined for keep-it-flat dict access; tests split when needed. + "extra_template_ids": ",".join(extra_ids), + } + + +async def _clear_budget_key() -> None: + settings = get_settings() + redis = Redis.from_url(settings.redis_url, decode_responses=False) + try: + await redis.delete(daily_key(datetime.now(UTC))) + finally: + await redis.aclose() + + +def _make_arq_ctx() -> tuple[dict[str, Any], MagicMock]: + arq_pool = MagicMock() + arq_pool.enqueue_job = AsyncMock(return_value=None) + ctx: dict[str, Any] = {"arq_pool": arq_pool} + return ctx, arq_pool + + +async def _get_child(parent_id: str) -> Any: + factory = get_session_factory() + async with factory() as db: + children = await repo.list_children_of_study(db, parent_id) + assert len(children) == 1, f"expected exactly 1 child, got {len(children)}" + return children[0] + + +# --------------------------------------------------------------------------- +# AC-3 — legacy/default path: no strategy key → no new config keys on child +# (byte-identical to pre-feature behavior). +# --------------------------------------------------------------------------- + + +async def test_ac3_legacy_path_persists_no_new_keys() -> None: + """Per FR-3 + AC-3 + D-12: a parent with NO ``auto_followup_strategy`` + key produces a child whose ``config`` contains NEITHER + ``auto_followup_selected_kind`` NOR ``auto_followup_visited_template_ids``. + Backward-compat gate — also verified by ``test_auto_followup.py`` + passing unmodified.""" + from backend.workers.auto_followup import enqueue_followup_study + + await _clear_budget_key() + seeded = await _seed_parent_with_digest(strategy=None, digest_followups=None) + ctx, _ = _make_arq_ctx() + + await enqueue_followup_study(ctx, seeded["parent_id"]) + + child = await _get_child(seeded["parent_id"]) + assert child.template_id == seeded["template_id"] # same template + assert "auto_followup_selected_kind" not in child.config + assert "auto_followup_visited_template_ids" not in child.config + assert "auto_followup_strategy" not in child.config # inherited (was None) + assert child.config["auto_followup_depth"] == 2 # decremented + + +# --------------------------------------------------------------------------- +# AC-6 — follow_suggestions consumes top-narrow follow-up +# --------------------------------------------------------------------------- + + +async def test_ac6_follow_suggestions_narrow_consumed() -> None: + """Top executable is a `narrow` → child uses its search_space verbatim, + keeps parent.template_id, persists selected_kind="narrow" and the + visited list at [parent.template_id] (no growth since template + unchanged — D-12 ordered-unique).""" + from backend.workers.auto_followup import enqueue_followup_study + + await _clear_budget_key() + seeded = await _seed_parent_with_digest( + strategy="follow_suggestions", + digest_followups=[ + { + "kind": "narrow", + "rationale": "narrow around the winner", + "search_space": _VALID_SEARCH_SPACE_DICT, + }, + {"kind": "text", "rationale": "ignored", "search_space": None}, + ], + ) + ctx, _ = _make_arq_ctx() + + await enqueue_followup_study(ctx, seeded["parent_id"]) + + child = await _get_child(seeded["parent_id"]) + assert child.template_id == seeded["template_id"] # narrow keeps parent template + assert child.config["auto_followup_selected_kind"] == "narrow" + assert child.config["auto_followup_visited_template_ids"] == [seeded["template_id"]] + assert child.config["auto_followup_strategy"] == "follow_suggestions" # AC-10 + # The child's search_space mirrors the follow-up's bounds (the + # follow-up's SearchSpace serializes through model_dump() which adds + # type/log defaults — compare structural fields instead of full + # dict equality). + assert set(child.search_space["params"].keys()) == {"title_boost"} + assert child.search_space["params"]["title_boost"]["low"] == 0.5 + assert child.search_space["params"]["title_boost"]["high"] == 2.0 + + +# --------------------------------------------------------------------------- +# AC-7 — swap_template branches the child's template_id +# --------------------------------------------------------------------------- + + +async def test_ac7_follow_suggestions_swap_template_branches_template_id() -> None: + """Top executable is a `swap_template` → child.template_id = swap + target, search_space from the follow-up verbatim, visited list grows + to [parent.template_id, swap_target_template_id].""" + from backend.workers.auto_followup import enqueue_followup_study + + await _clear_budget_key() + # digest_followups=None — skip helper's empty-digest creation so this + # test can create the digest with the swap target's id (known only + # after the helper returns). + seeded = await _seed_parent_with_digest( + strategy="follow_suggestions", + extra_template_ids=1, + digest_followups=None, + ) + swap_target_id = seeded["extra_template_ids"].split(",")[0] + # Now create the digest with the swap target's real id. + factory = get_session_factory() + async with factory() as db: + await repo.create_digest( + db, + id=str(uuid.uuid4()), + study_id=seeded["parent_id"], + narrative="seeded digest", + parameter_importance={"title_boost": 1.0}, + recommended_config={"title_boost": 1.5}, + suggested_followups=[ + { + "kind": "swap_template", + "rationale": "function-score template is a better fit", + "template_id": swap_target_id, + "search_space": _VALID_SEARCH_SPACE_DICT, + } + ], + generated_by="local:test-fixture", + ) + await db.commit() + + ctx, _ = _make_arq_ctx() + await enqueue_followup_study(ctx, seeded["parent_id"]) + + child = await _get_child(seeded["parent_id"]) + assert child.template_id == swap_target_id + assert child.template_id != seeded["template_id"] # branched away from parent + assert child.config["auto_followup_selected_kind"] == "swap_template" + assert child.config["auto_followup_visited_template_ids"] == [ + seeded["template_id"], + swap_target_id, + ] + # Same model_dump-defaults normalization as AC-6. + assert set(child.search_space["params"].keys()) == {"title_boost"} + assert child.search_space["params"]["title_boost"]["low"] == 0.5 + assert child.search_space["params"]["title_boost"]["high"] == 2.0 + + +# --------------------------------------------------------------------------- +# AC-9 — fallback to narrow when digest has only text follow-ups +# --------------------------------------------------------------------------- + + +async def test_ac9_text_only_digest_falls_back_to_narrow_default() -> None: + """`follow_suggestions` strategy + text-only digest → narrow fallback, + child.template_id stays at parent.template_id, selected_kind = + "narrow_default". Chain does NOT stall.""" + from backend.workers.auto_followup import enqueue_followup_study + + await _clear_budget_key() + seeded = await _seed_parent_with_digest( + strategy="follow_suggestions", + digest_followups=[ + {"kind": "text", "rationale": "re-run with bigger budget", "search_space": None}, + {"kind": "text", "rationale": "investigate category X", "search_space": None}, + ], + ) + ctx, _ = _make_arq_ctx() + + await enqueue_followup_study(ctx, seeded["parent_id"]) + + child = await _get_child(seeded["parent_id"]) + assert child.template_id == seeded["template_id"] + assert child.config["auto_followup_selected_kind"] == "narrow_default" + assert child.config["auto_followup_visited_template_ids"] == [seeded["template_id"]] + assert child.config["auto_followup_strategy"] == "follow_suggestions" + + +# --------------------------------------------------------------------------- +# AC-8 — cycle guard drops swap-to-visited; widen selected; dropped recorded +# --------------------------------------------------------------------------- + + +async def test_ac8_cycle_guard_drops_swap_to_visited_and_selects_widen() -> None: + """Worker-level coverage per P1-B3. Parent's visited list pre-populated + with the swap target's id (simulating a multi-link chain that already + visited template B). Digest emits both a swap-to-B (dropped) and a + widen. Child runs the widen; visited list stays the same (widen keeps + parent template).""" + from backend.workers.auto_followup import enqueue_followup_study + + await _clear_budget_key() + # Seed with extra templates + pre-populated visited list including both. + # digest_followups=None — skip helper's digest so this test creates + # the digest with the swap target's id below (avoids + # digests_study_id_key UNIQUE violation). + seeded = await _seed_parent_with_digest( + strategy="follow_suggestions", + extra_template_ids=1, + visited_template_ids=None, # set below once we know the extra id + digest_followups=None, + ) + swap_target_id = seeded["extra_template_ids"].split(",")[0] + # Pre-populate the parent's visited list to include the swap target + # AND the parent's template. + factory = get_session_factory() + async with factory() as db: + # Re-fetch parent and update its config in-place. + parent = await repo.get_study(db, seeded["parent_id"]) + assert parent is not None + new_config = dict(parent.config) + new_config["auto_followup_visited_template_ids"] = [seeded["template_id"], swap_target_id] + parent.config = new_config + await db.flush() + await repo.create_digest( + db, + id=str(uuid.uuid4()), + study_id=seeded["parent_id"], + narrative="seeded digest", + parameter_importance={"title_boost": 1.0}, + recommended_config={"title_boost": 1.5}, + suggested_followups=[ + { + "kind": "swap_template", + "rationale": "to already-visited", + "template_id": swap_target_id, + "search_space": _VALID_SEARCH_SPACE_DICT, + }, + { + "kind": "widen", + "rationale": "widen kept on the same template", + "search_space": _VALID_SEARCH_SPACE_DICT, + }, + ], + generated_by="local:test-fixture", + ) + await db.commit() + + ctx, _ = _make_arq_ctx() + await enqueue_followup_study(ctx, seeded["parent_id"]) + + child = await _get_child(seeded["parent_id"]) + # Widen kept the same template, but the visited list was inherited + # verbatim (parent already had the swap target in there). + assert child.template_id == seeded["template_id"] + assert child.config["auto_followup_selected_kind"] == "widen" + # Inherited ordered-unique: [parent, swap_target]; adding parent again + # via the ordered-unique dedup keeps it at length 2. + assert child.config["auto_followup_visited_template_ids"] == [ + seeded["template_id"], + swap_target_id, + ] + + +# --------------------------------------------------------------------------- +# AC-10 — strategy inherited verbatim down the chain +# (subsumed by AC-6/AC-7/AC-9 assertions on child.config.auto_followup_strategy) +# --------------------------------------------------------------------------- + + +async def test_ac10_strategy_inherited_verbatim() -> None: + """Parent on follow_suggestions → child also has + ``auto_followup_strategy == "follow_suggestions"``. The child's own + autopilot will dispatch the same branch when its digest lands.""" + from backend.workers.auto_followup import enqueue_followup_study + + await _clear_budget_key() + seeded = await _seed_parent_with_digest( + strategy="follow_suggestions", + digest_followups=[ + { + "kind": "narrow", + "rationale": "any executable", + "search_space": _VALID_SEARCH_SPACE_DICT, + } + ], + ) + ctx, _ = _make_arq_ctx() + await enqueue_followup_study(ctx, seeded["parent_id"]) + child = await _get_child(seeded["parent_id"]) + assert child.config["auto_followup_strategy"] == "follow_suggestions" + + +# --------------------------------------------------------------------------- +# AC-17 — deleted swap target → WARN + fallback +# --------------------------------------------------------------------------- + + +async def test_ac17_deleted_swap_target_falls_back_to_narrow() -> None: + """Digest points at a template_id that doesn't exist (deleted between + persist + dispatch). Worker logs WARN with event_type + ``auto_followup_swap_target_missing`` and falls back to narrow on + parent.template_id (selected_kind = "narrow_default"). + + Uses ``structlog.testing.capture_logs`` — the worker emits via + ``structlog.get_logger`` directly so pytest's caplog (which captures + stdlib logging records) doesn't see these events. Mirrors the + existing ``test_enqueue_emits_auto_followup_enqueued_event`` pattern + in ``test_auto_followup.py``.""" + import structlog.testing + + from backend.workers.auto_followup import enqueue_followup_study + + await _clear_budget_key() + fake_template_id = str(uuid.uuid4()) # never created in DB + seeded = await _seed_parent_with_digest( + strategy="follow_suggestions", + digest_followups=[ + { + "kind": "swap_template", + "rationale": "swap to deleted", + "template_id": fake_template_id, + "search_space": _VALID_SEARCH_SPACE_DICT, + }, + ], + ) + ctx, _ = _make_arq_ctx() + + with structlog.testing.capture_logs() as captured: + await enqueue_followup_study(ctx, seeded["parent_id"]) + + child = await _get_child(seeded["parent_id"]) + assert child.template_id == seeded["template_id"] # fell back + assert child.config["auto_followup_selected_kind"] == "narrow_default" + event_types = [e.get("event_type") for e in captured] + assert "auto_followup_swap_target_missing" in event_types + + +# --------------------------------------------------------------------------- +# AC-18 — parent's stale auto_followup_selected_kind does NOT leak to child +# --------------------------------------------------------------------------- + + +async def test_ac18_legacy_path_pops_inherited_selected_kind() -> None: + """Defensive contract per AC-18. A parent that happens to carry + ``auto_followup_selected_kind = "widen"`` on its config (e.g. it was + itself a chain-link) — but is on the legacy (no-strategy / "narrow") + path — must produce a child whose config does NOT carry that key at + all. The worker pops the inherited value before INSERT.""" + from backend.workers.auto_followup import enqueue_followup_study + + await _clear_budget_key() + seeded = await _seed_parent_with_digest( + strategy=None, # legacy path + parent_selected_kind="widen", # stale inherited value + digest_followups=None, + ) + ctx, _ = _make_arq_ctx() + + await enqueue_followup_study(ctx, seeded["parent_id"]) + + child = await _get_child(seeded["parent_id"]) + assert "auto_followup_selected_kind" not in child.config + + +async def test_ac18_follow_suggestions_overwrites_stale_parent_kind() -> None: + """Same defensive contract on the follow_suggestions path: even + if parent carries a stale ``"widen"``, the child reflects the + selection THIS worker invocation made (here: ``"narrow"`` from the + digest's first executable), NOT the inherited value.""" + from backend.workers.auto_followup import enqueue_followup_study + + await _clear_budget_key() + seeded = await _seed_parent_with_digest( + strategy="follow_suggestions", + parent_selected_kind="widen", + digest_followups=[ + { + "kind": "narrow", + "rationale": "narrow", + "search_space": _VALID_SEARCH_SPACE_DICT, + } + ], + ) + ctx, _ = _make_arq_ctx() + + await enqueue_followup_study(ctx, seeded["parent_id"]) + + child = await _get_child(seeded["parent_id"]) + assert child.config["auto_followup_selected_kind"] == "narrow" + + +# --------------------------------------------------------------------------- +# P1-B4 — unexpected error in dispatch → defensive fallback + WARN +# --------------------------------------------------------------------------- + + +async def test_exception_in_follow_suggestions_dispatch_falls_back_to_narrow( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Force a synthetic exception inside the follow_suggestions dispatch + block (by monkeypatching ``select_executable_followup`` to raise). The + worker must catch it, emit the ``auto_followup_strategy_dispatch_error`` + WARN, and create the child on the legacy narrow path. Chain reliability + MUST NOT regress vs the legacy path (spec §13 Reliability + P1-B4). + + Uses ``structlog.testing.capture_logs`` per the same pattern as + ``test_ac17_deleted_swap_target_falls_back_to_narrow`` — the worker + emits via structlog directly so pytest's caplog doesn't see these + events.""" + import structlog.testing + + from backend.workers import auto_followup as worker_module + from backend.workers.auto_followup import enqueue_followup_study + + def boom(*_args: Any, **_kwargs: Any) -> Any: + raise RuntimeError("synthetic failure for the defensive fallback test") + + monkeypatch.setattr(worker_module, "select_executable_followup", boom) + + await _clear_budget_key() + seeded = await _seed_parent_with_digest( + strategy="follow_suggestions", + digest_followups=[ + { + "kind": "narrow", + "rationale": "would-be selection", + "search_space": _VALID_SEARCH_SPACE_DICT, + }, + ], + ) + ctx, _ = _make_arq_ctx() + + # Should NOT raise — the worker swallows + falls back. + with structlog.testing.capture_logs() as captured: + await enqueue_followup_study(ctx, seeded["parent_id"]) + + child = await _get_child(seeded["parent_id"]) + assert child.template_id == seeded["template_id"] + # Per D-12: fallback under follow_suggestions persists "narrow_default". + assert child.config["auto_followup_selected_kind"] == "narrow_default" + event_types = [e.get("event_type") for e in captured] + assert "auto_followup_strategy_dispatch_error" in event_types diff --git a/backend/tests/unit/api/test_validation_error_handler.py b/backend/tests/unit/api/test_validation_error_handler.py index 60c85baa..ea359989 100644 --- a/backend/tests/unit/api/test_validation_error_handler.py +++ b/backend/tests/unit/api/test_validation_error_handler.py @@ -66,6 +66,69 @@ def test_auto_followup_depth_emits_canonical_error_code() -> None: assert detail["retryable"] is False +def test_auto_followup_strategy_value_emits_canonical_error_code() -> None: + """feat_overnight_final_solution Story 1.1 — bad strategy VALUE → envelope + ``error_code=AUTO_FOLLOWUP_STRATEGY_INVALID``. AC-2.""" + try: + StudyConfigSpec( + max_trials=20, auto_followup_depth=3, auto_followup_strategy="broaden_everything" + ) + except ValidationError as e: + body = _run_handler(RequestValidationError(e.errors())) + else: + raise AssertionError("StudyConfigSpec did not raise on unknown strategy value") + + assert body["__status__"] == 422 + detail = body["detail"] + assert isinstance(detail, dict) + assert detail["error_code"] == "AUTO_FOLLOWUP_STRATEGY_INVALID" + assert "narrow" in detail["message"] and "follow_suggestions" in detail["message"] + + +def test_auto_followup_strategy_pair_rule_emits_canonical_error_code() -> None: + """feat_overnight_final_solution Story 1.1 — strategy set without + depth >= 1 → ``AUTO_FOLLOWUP_STRATEGY_INVALID``. AC-1.""" + try: + StudyConfigSpec(max_trials=20, auto_followup_strategy="follow_suggestions") + except ValidationError as e: + body = _run_handler(RequestValidationError(e.errors())) + else: + raise AssertionError("StudyConfigSpec did not raise on pair-rule violation") + + assert body["__status__"] == 422 + detail = body["detail"] + assert isinstance(detail, dict) + assert detail["error_code"] == "AUTO_FOLLOWUP_STRATEGY_INVALID" + assert "auto_followup_depth" in detail["message"] + + +def test_auto_followup_strategy_visited_list_reject_emits_canonical_error_code() -> None: + """D-14: operator-submitted ``auto_followup_visited_template_ids`` + → ``AUTO_FOLLOWUP_STRATEGY_INVALID``. The ``mode='before'`` validator + fires BEFORE Pydantic's default ``extra='ignore'`` would silently drop + the key — confirming the single-writer rule is enforced at the wire + contract.""" + try: + StudyConfigSpec.model_validate( + { + "max_trials": 20, + "auto_followup_depth": 3, + "auto_followup_strategy": "follow_suggestions", + "auto_followup_visited_template_ids": ["TEMPLATE_A"], + } + ) + except ValidationError as e: + body = _run_handler(RequestValidationError(e.errors())) + else: + raise AssertionError("StudyConfigSpec did not raise on operator-submitted visited list") + + assert body["__status__"] == 422 + detail = body["detail"] + assert isinstance(detail, dict) + assert detail["error_code"] == "AUTO_FOLLOWUP_STRATEGY_INVALID" + assert "worker-managed" in detail["message"] + + def test_non_prefixed_validation_error_falls_back_to_generic_envelope() -> None: """Regression guard (cycle-2 finding C2-1): a Pydantic validator that raises ValueError WITHOUT a recognized prefix (e.g., the existing diff --git a/backend/tests/unit/domain/study/test_auto_followup_strategy.py b/backend/tests/unit/domain/study/test_auto_followup_strategy.py new file mode 100644 index 00000000..207903b2 --- /dev/null +++ b/backend/tests/unit/domain/study/test_auto_followup_strategy.py @@ -0,0 +1,450 @@ +# SPDX-FileCopyrightText: 2026 soundminds.ai +# +# SPDX-License-Identifier: Apache-2.0 + +"""Unit tests for :func:`select_executable_followup` (Story 2.1). + +Pure-domain selector. No DB, no fixtures, no I/O. Mirrors the test layout +of ``test_followups_backcompat.py`` — same shared search-space dict so a +single Pydantic-validated payload powers every case. + +Coverage matrix per spec §14 Unit tests + plan Story 2.1 DoD list: + +* Empty list → ``selected=None``, ``candidate_count=0``, + ``dropped_template_ids=[]``. +* Text-only list → same. +* Single narrow → narrow selected at source_index=0, candidate_count=1, + dropped empty. +* Text + narrow (text first) → narrow selected at source_index=1 + (original index preserved, NOT post-filter index — telemetry contract). +* swap to visited template + widen → widen selected, swap recorded in + ``dropped_template_ids`` (AC-8 selector half). +* swap to non-visited template → swap selected (AC-7 selector half). +* All-swaps-cycle-dropped (only swaps, all visited) → ``selected=None`` + with non-empty ``dropped_template_ids`` (AC-9 selector half — the + fallback event still carries cycle-guard diagnostics). +* Multiple executable candidates of different kinds → first-by-original- + index wins (D-5: trust digest ordering, no kind-preference policy). +* Determinism property: same input → same output (run twice, equal). +* ``dropped_template_ids`` is sorted ascending (deterministic telemetry). +""" + +from __future__ import annotations + +from backend.app.domain.study.auto_followup_strategy import ( + SELECTED_FOLLOWUP_KIND_VALUES, + SelectionOutcome, + select_executable_followup, +) +from backend.app.domain.study.followups import ( + FollowupItem, + FollowupListAdapter, + NarrowFollowup, + SwapTemplateFollowup, + WidenFollowup, +) + +# Reused across cases — a single small but valid search space keeps the +# fixtures cheap. (Pydantic-validated via FollowupListAdapter so the test +# inputs match the contract the selector consumes from parse_followup_list.) +_VALID_SEARCH_SPACE = { + "params": {"title_boost": {"type": "float", "low": 0.5, "high": 2.0}}, +} + +# Two 36-char template_ids (the SwapTemplateFollowup field requires exact- +# 36-char strings — UUIDs). Using deterministic patterns rather than +# uuid.uuid4() to keep tests readable and order-stable. +TEMPLATE_A = "aaaaaaaa-aaaa-7aaa-8aaa-aaaaaaaaaaaa" +TEMPLATE_B = "bbbbbbbb-bbbb-7bbb-8bbb-bbbbbbbbbbbb" +TEMPLATE_C = "cccccccc-cccc-7ccc-8ccc-cccccccccccc" + + +def _build(items: list[dict[str, object]]) -> list[FollowupItem]: + """Round-trip via FollowupListAdapter so the test inputs are Pydantic-validated.""" + return FollowupListAdapter.validate_python(items) + + +# --------------------------------------------------------------------------- +# Empty + text-only cases — no executable candidate available. +# --------------------------------------------------------------------------- + + +class TestEmptyAndTextOnly: + def test_empty_list_returns_no_selection(self) -> None: + outcome = select_executable_followup([], visited_template_ids=set()) + assert outcome == SelectionOutcome( + selected=None, + source_index=None, + candidate_count=0, + dropped_template_ids=[], + ) + + def test_text_only_list_returns_no_selection(self) -> None: + followups = _build( + [ + {"kind": "text", "rationale": "re-run with bigger budget", "search_space": None}, + {"kind": "text", "rationale": "investigate query category X", "search_space": None}, + ] + ) + outcome = select_executable_followup(followups, visited_template_ids=set()) + assert outcome.selected is None + assert outcome.source_index is None + assert outcome.candidate_count == 0 + assert outcome.dropped_template_ids == [] + + +# --------------------------------------------------------------------------- +# Single-kind executable cases. +# --------------------------------------------------------------------------- + + +class TestSingleKindSelection: + def test_single_narrow_is_selected(self) -> None: + followups = _build( + [ + { + "kind": "narrow", + "rationale": "narrow around the winner", + "search_space": _VALID_SEARCH_SPACE, + } + ] + ) + outcome = select_executable_followup(followups, visited_template_ids=set()) + assert isinstance(outcome.selected, NarrowFollowup) + assert outcome.source_index == 0 + assert outcome.candidate_count == 1 + assert outcome.dropped_template_ids == [] + + def test_single_widen_is_selected(self) -> None: + followups = _build( + [ + { + "kind": "widen", + "rationale": "winner hit upper edge", + "search_space": _VALID_SEARCH_SPACE, + } + ] + ) + outcome = select_executable_followup(followups, visited_template_ids=set()) + assert isinstance(outcome.selected, WidenFollowup) + assert outcome.source_index == 0 + assert outcome.candidate_count == 1 + + def test_single_swap_to_non_visited_is_selected(self) -> None: + """AC-7 selector half — swap to a template not in the visited set + is selected without modification.""" + followups = _build( + [ + { + "kind": "swap_template", + "rationale": "function-score template is a better fit", + "template_id": TEMPLATE_B, + "search_space": _VALID_SEARCH_SPACE, + } + ] + ) + outcome = select_executable_followup(followups, visited_template_ids={TEMPLATE_A}) + assert isinstance(outcome.selected, SwapTemplateFollowup) + assert outcome.selected.template_id == TEMPLATE_B + assert outcome.source_index == 0 + assert outcome.candidate_count == 1 + assert outcome.dropped_template_ids == [] + + +# --------------------------------------------------------------------------- +# Original-index preservation — the `source_index` telemetry field must +# point at the ORIGINAL position in the input list, not the post-filter +# position (D-4: telemetry contract is correlation-friendly with the +# digest's persisted order). +# --------------------------------------------------------------------------- + + +class TestOriginalIndexPreservation: + def test_text_then_narrow_selects_narrow_at_original_index_one(self) -> None: + followups = _build( + [ + {"kind": "text", "rationale": "first text", "search_space": None}, + { + "kind": "narrow", + "rationale": "second is the runnable one", + "search_space": _VALID_SEARCH_SPACE, + }, + ] + ) + outcome = select_executable_followup(followups, visited_template_ids=set()) + assert isinstance(outcome.selected, NarrowFollowup) + assert outcome.source_index == 1 # original index, NOT 0 + assert outcome.candidate_count == 1 + + def test_three_texts_then_widen_selects_widen_at_original_index_three(self) -> None: + followups = _build( + [ + {"kind": "text", "rationale": "t0", "search_space": None}, + {"kind": "text", "rationale": "t1", "search_space": None}, + {"kind": "text", "rationale": "t2", "search_space": None}, + { + "kind": "widen", + "rationale": "the widen one", + "search_space": _VALID_SEARCH_SPACE, + }, + ] + ) + outcome = select_executable_followup(followups, visited_template_ids=set()) + assert isinstance(outcome.selected, WidenFollowup) + assert outcome.source_index == 3 + assert outcome.candidate_count == 1 + + +# --------------------------------------------------------------------------- +# Cycle-guard cases — swap_template filtering against the visited set. +# --------------------------------------------------------------------------- + + +class TestCycleGuard: + def test_swap_to_visited_template_is_dropped(self) -> None: + """The single executable is a swap to a visited template — it + gets dropped, and the outcome is no-selection with the dropped + id recorded for the fallback telemetry event (AC-9 selector + half).""" + followups = _build( + [ + { + "kind": "swap_template", + "rationale": "swap to A", + "template_id": TEMPLATE_A, + "search_space": _VALID_SEARCH_SPACE, + } + ] + ) + outcome = select_executable_followup(followups, visited_template_ids={TEMPLATE_A}) + assert outcome.selected is None + assert outcome.source_index is None + assert outcome.candidate_count == 0 + assert outcome.dropped_template_ids == [TEMPLATE_A] + + def test_swap_to_visited_plus_widen_selects_widen_and_records_drop(self) -> None: + """AC-8 selector half — swap to a visited template is dropped; + the next executable (a widen) is selected; the dropped template + id is recorded on the outcome for the strategy-selected + telemetry event.""" + followups = _build( + [ + { + "kind": "swap_template", + "rationale": "swap to already-visited A", + "template_id": TEMPLATE_A, + "search_space": _VALID_SEARCH_SPACE, + }, + { + "kind": "widen", + "rationale": "widen kept on the same template", + "search_space": _VALID_SEARCH_SPACE, + }, + ] + ) + outcome = select_executable_followup(followups, visited_template_ids={TEMPLATE_A}) + assert isinstance(outcome.selected, WidenFollowup) + assert outcome.source_index == 1 + assert outcome.candidate_count == 1 + assert outcome.dropped_template_ids == [TEMPLATE_A] + + def test_all_swaps_to_visited_templates_returns_no_selection_with_drops( + self, + ) -> None: + """All executable candidates are swaps to visited templates — + the chain wanted to ping-pong; the cycle guard fired on every + one. Worker dispatches the fallback path; the fallback event + carries the dropped ids.""" + followups = _build( + [ + { + "kind": "swap_template", + "rationale": "to A", + "template_id": TEMPLATE_A, + "search_space": _VALID_SEARCH_SPACE, + }, + { + "kind": "swap_template", + "rationale": "to B", + "template_id": TEMPLATE_B, + "search_space": _VALID_SEARCH_SPACE, + }, + ] + ) + outcome = select_executable_followup( + followups, visited_template_ids={TEMPLATE_A, TEMPLATE_B} + ) + assert outcome.selected is None + assert outcome.candidate_count == 0 + assert outcome.dropped_template_ids == [TEMPLATE_A, TEMPLATE_B] + + def test_narrow_keeps_same_template_not_subject_to_cycle_guard(self) -> None: + """D-9 — the cycle guard is template-based AND swap-only. A + `narrow` on the visited template is a legitimate continuation of + the chain (the digest is suggesting tighter bounds), so it must + be selected even though `parent.template_id` is in the visited + set.""" + followups = _build( + [ + { + "kind": "narrow", + "rationale": "tighter bounds on the same template", + "search_space": _VALID_SEARCH_SPACE, + } + ] + ) + outcome = select_executable_followup( + followups, + visited_template_ids={TEMPLATE_A}, # whatever this template is + ) + assert isinstance(outcome.selected, NarrowFollowup) + assert outcome.dropped_template_ids == [] + + +# --------------------------------------------------------------------------- +# Multi-kind: first-by-original-index wins (D-5 — trust digest ordering). +# --------------------------------------------------------------------------- + + +class TestFirstByOriginalIndexWins: + def test_widen_before_narrow_selects_widen(self) -> None: + """No kind-preference policy. The digest's convergence-aware + ordering at the prompt layer puts the recommended kind first; + the autopilot trusts that order without re-ranking.""" + followups = _build( + [ + {"kind": "widen", "rationale": "first", "search_space": _VALID_SEARCH_SPACE}, + {"kind": "narrow", "rationale": "second", "search_space": _VALID_SEARCH_SPACE}, + ] + ) + outcome = select_executable_followup(followups, visited_template_ids=set()) + assert isinstance(outcome.selected, WidenFollowup) + assert outcome.source_index == 0 + assert outcome.candidate_count == 2 + + def test_swap_before_narrow_selects_swap(self) -> None: + followups = _build( + [ + { + "kind": "swap_template", + "rationale": "first — swap suggested", + "template_id": TEMPLATE_B, + "search_space": _VALID_SEARCH_SPACE, + }, + {"kind": "narrow", "rationale": "second", "search_space": _VALID_SEARCH_SPACE}, + ] + ) + outcome = select_executable_followup(followups, visited_template_ids={TEMPLATE_A}) + assert isinstance(outcome.selected, SwapTemplateFollowup) + assert outcome.selected.template_id == TEMPLATE_B + assert outcome.source_index == 0 + assert outcome.candidate_count == 2 + + def test_text_swap_visited_narrow_picks_narrow_records_swap_drop(self) -> None: + """Mixed: text (drop), swap-to-visited (cycle-drop + recorded), + narrow (selected). source_index points at the narrow's original + index (2). The single dropped swap survives in the outcome's + telemetry list.""" + followups = _build( + [ + {"kind": "text", "rationale": "t0", "search_space": None}, + { + "kind": "swap_template", + "rationale": "swap to visited", + "template_id": TEMPLATE_A, + "search_space": _VALID_SEARCH_SPACE, + }, + { + "kind": "narrow", + "rationale": "the survivor", + "search_space": _VALID_SEARCH_SPACE, + }, + ] + ) + outcome = select_executable_followup(followups, visited_template_ids={TEMPLATE_A}) + assert isinstance(outcome.selected, NarrowFollowup) + assert outcome.source_index == 2 + assert outcome.candidate_count == 1 + assert outcome.dropped_template_ids == [TEMPLATE_A] + + +# --------------------------------------------------------------------------- +# Determinism property + dropped_template_ids ordering invariant. +# --------------------------------------------------------------------------- + + +class TestDeterminismAndOrdering: + def test_same_input_returns_equal_outcome_twice(self) -> None: + """Pure-function contract — selector is deterministic. Same input + produces equal output across any number of calls (the worker + retries the same job after a transient failure must produce the + same selection).""" + followups = _build( + [ + { + "kind": "swap_template", + "rationale": "to A", + "template_id": TEMPLATE_A, + "search_space": _VALID_SEARCH_SPACE, + }, + { + "kind": "narrow", + "rationale": "narrow fallback", + "search_space": _VALID_SEARCH_SPACE, + }, + ] + ) + first = select_executable_followup(followups, visited_template_ids={TEMPLATE_A}) + second = select_executable_followup(followups, visited_template_ids={TEMPLATE_A}) + assert first == second + + def test_dropped_template_ids_is_sorted_ascending(self) -> None: + """Deterministic telemetry: when multiple swap_templates are + cycle-dropped, the ``dropped_template_ids`` field on the + outcome is sorted ascending. Stops the test suite from being + flaky against arbitrary digest order, and gives runbooks a + stable grep target.""" + followups = _build( + [ + { + "kind": "swap_template", + "rationale": "to C", + "template_id": TEMPLATE_C, + "search_space": _VALID_SEARCH_SPACE, + }, + { + "kind": "swap_template", + "rationale": "to A", + "template_id": TEMPLATE_A, + "search_space": _VALID_SEARCH_SPACE, + }, + { + "kind": "swap_template", + "rationale": "to B", + "template_id": TEMPLATE_B, + "search_space": _VALID_SEARCH_SPACE, + }, + ] + ) + outcome = select_executable_followup( + followups, visited_template_ids={TEMPLATE_A, TEMPLATE_B, TEMPLATE_C} + ) + assert outcome.selected is None + # Ascending — A < B < C — regardless of input order. + assert outcome.dropped_template_ids == [TEMPLATE_A, TEMPLATE_B, TEMPLATE_C] + + +# --------------------------------------------------------------------------- +# SELECTED_FOLLOWUP_KIND_VALUES — wire-value source-of-truth lock. +# --------------------------------------------------------------------------- + + +def test_selected_followup_kind_values_are_canonical() -> None: + """Frontend mirror in ``ui/src/lib/enums.ts SELECTED_FOLLOWUP_KIND_VALUES`` + (Story 3.2) MUST match this tuple character-for-character + order.""" + assert SELECTED_FOLLOWUP_KIND_VALUES == ( + "narrow_default", + "narrow", + "widen", + "swap_template", + ) diff --git a/backend/workers/auto_followup.py b/backend/workers/auto_followup.py index 68edb0d2..d32b3ce8 100644 --- a/backend/workers/auto_followup.py +++ b/backend/workers/auto_followup.py @@ -51,6 +51,16 @@ ChainGateDecision, evaluate_chain_gate, ) +from backend.app.domain.study.auto_followup_strategy import ( + SelectionOutcome, + select_executable_followup, +) +from backend.app.domain.study.followups import ( + NarrowFollowup, + SwapTemplateFollowup, + WidenFollowup, + parse_followup_list, +) from backend.app.domain.study.search_space_defaults import ( build_starter_search_space, narrow_bounds_around_winner, @@ -214,13 +224,173 @@ async def enqueue_followup_study(ctx: dict[str, Any], parent_study_id: str) -> N bracket=0.5, ) + # 7b. feat_overnight_final_solution Story 2.2 — strategy dispatch. + # When parent.config.auto_followup_strategy == "follow_suggestions" + # the autopilot consumes the parent's digest follow-ups instead of + # always running the same ±50% narrow. Default / "narrow" / missing + # keep today's exact behavior (byte-identical legacy path). + strategy = parent.config.get("auto_followup_strategy") + # State that may be overridden by the follow_suggestions branch. + # Defaults mirror the legacy narrow path so a clean fallback works. + child_template_id: str = parent.template_id + selection_outcome: SelectionOutcome | None = None # populated only under follow_suggestions + swap_target_missing = False # tracked for the post-INSERT telemetry suppression + # Captured INSIDE the db-context block so the post-commit + # telemetry can use them without re-querying the closed session + # (P2 verification finding — the `async with factory() as db:` + # block ends at the commit; later code runs against a closed + # session). + digest_followup_kinds: list[str] = [] + visited_template_id_count: int = 0 + + if strategy == "follow_suggestions": + # Wrap the whole follow_suggestions block in a defensive + # try/except per spec §13 Reliability + P1-B4: any unexpected + # error in digest read / parse / select MUST be caught and + # fall back to today's narrow path with a WARN. Chain + # reliability MUST NOT regress vs the legacy path. + try: + digest = await repo.get_digest_for_study(db, parent.id) + # F2 (GPT-5.5 final review): a missing digest under + # follow_suggestions is the defensive edge case spec FR-3 + # flagged — the digest worker normally enqueues this + # worker AFTER persisting, so a None here means manual + # digest deletion / persistence drift. WARN with the + # distinct event_type so operators can grep this case + # apart from the routine text-only-digest fallback. + if digest is None: + logger.warning( + "auto_followup follow_suggestions: parent digest missing", + event_type="auto_followup_strategy_digest_missing", + parent_study_id=parent.id, + ) + raw_followups = digest.suggested_followups if digest else [] + followups = parse_followup_list(raw_followups, study_id=parent.id) + # Capture diagnostics for the post-commit telemetry. + digest_followup_kinds = [f.kind for f in followups] + # Anchor's missing key is treated as [anchor.template_id] + # per D-14 (single-writer rule). The worker is the sole + # writer of this list. + parent_visited_list: list[str] = parent.config.get( + "auto_followup_visited_template_ids", + [parent.template_id], + ) + visited_template_id_count = len(parent_visited_list) + visited_template_ids: set[str] = set(parent_visited_list) + selection_outcome = select_executable_followup(followups, visited_template_ids) + + sel = selection_outcome.selected + if isinstance(sel, (NarrowFollowup, WidenFollowup)): + # Consume the follow-up's search_space verbatim + # (already validated by parse_followup_list + the + # digest worker's structured-output schema). Keep + # parent.template_id — narrow/widen never branch + # the template. + child_space = sel.search_space + narrowed_names = sorted(sel.search_space.params.keys()) + # child_template_id stays at parent.template_id + elif isinstance(sel, SwapTemplateFollowup): + # Defensive: the swap target may have been hard- + # deleted between digest persist and now (AC-17). + # On miss → WARN + fall through to the narrow + # fallback (the swap_template event consumes the + # missing-target slot; the no-executable event is + # NOT also emitted per FR-8). + swap_template = await repo.get_query_template(db, sel.template_id) + if swap_template is None: + # WARN emitted BEFORE fallback so it carries + # parent_study_id only (no child_study_id — + # the worker has not yet INSERTed the + # fallback child). + logger.warning( + "auto_followup swap_template target template missing", + event_type="auto_followup_swap_target_missing", + parent_study_id=parent.id, + swap_target_template_id=sel.template_id, + ) + swap_target_missing = True + # Fall through to narrow fallback (defaults + # for child_template_id / child_space are + # already the legacy narrow values from + # step 7 above). + else: + # Use the swap target's id; consume the + # follow-up's search_space verbatim — the + # digest worker called + # remap_search_space_for_swap_target before + # persisting so the bounds are already + # validated against the swap target's + # declared_params. + child_template_id = sel.template_id + child_space = sel.search_space + narrowed_names = sorted(sel.search_space.params.keys()) + # else: outcome.selected is None → fall through to + # narrow fallback (child_template_id / child_space + # already at legacy defaults). The telemetry on the + # fallback event still carries + # outcome.dropped_template_ids so a chain that wanted + # to ping-pong but was guard-dropped is observable on + # the same line. + except Exception as exc: # noqa: BLE001 — defensive fallback + # Spec §13 Reliability — any unexpected failure in the + # follow_suggestions dispatch must degrade to the + # legacy narrow path; chain reliability MUST NOT + # regress vs pre-feature. + logger.warning( + "auto_followup follow_suggestions dispatch failed; falling back to narrow", + event_type="auto_followup_strategy_dispatch_error", + parent_study_id=parent.id, + error=str(exc)[:200], + ) + selection_outcome = None # treat as "no selection" → fallback + # child_template_id + child_space remain at legacy + # narrow defaults from step 7 above. + # 8. Build the child config with the depth counter decremented. # FR-5 strict inheritance: every other key propagates verbatim. # Use .get() defensively in case parent.config was serialized with # exclude_none=True (Gemini Code Assist review, PR #223). parent_depth: int = parent.config.get("auto_followup_depth", 0) remaining = parent_depth - 1 - child_config = {**parent.config, "auto_followup_depth": remaining} + child_config: dict[str, Any] = {**parent.config, "auto_followup_depth": remaining} + + # Per FR-3 / AC-18: child must NEVER inherit the parent's + # auto_followup_selected_kind — it's per-link state recording the + # path THIS worker invocation took. Pop unconditionally; the + # follow_suggestions branch below re-sets the right value. + child_config.pop("auto_followup_selected_kind", None) + + if strategy == "follow_suggestions": + # Persist the cycle-guard state (ordered-unique visited list) + # and the per-link selected_kind. Per D-12, these keys are + # ONLY persisted under "follow_suggestions" — the legacy + # path stays clean. + parent_visited_raw = parent.config.get( + "auto_followup_visited_template_ids", + [parent.template_id], + ) + # Ordered-unique via list(dict.fromkeys(...)) per FR-5 + AC-6: + # when child_template_id == parent.template_id (narrow/widen + # kept the same template, OR fell back to narrow), the list + # does not grow. + child_config["auto_followup_visited_template_ids"] = list( + dict.fromkeys([*parent_visited_raw, child_template_id]) + ) + sel = selection_outcome.selected if selection_outcome is not None else None + if isinstance(sel, NarrowFollowup): + child_config["auto_followup_selected_kind"] = "narrow" + elif isinstance(sel, WidenFollowup): + child_config["auto_followup_selected_kind"] = "widen" + elif isinstance(sel, SwapTemplateFollowup) and not swap_target_missing: + child_config["auto_followup_selected_kind"] = "swap_template" + else: + # No executable selected, OR swap target missing → the + # follow_suggestions fallback-to-narrow path. Per D-12 + # this DOES persist "narrow_default" (operator picked + # follow_suggestions but the autopilot had nothing + # executable to run; the "refined" badge on the chain + # panel is the audit signal). + child_config["auto_followup_selected_kind"] = "narrow_default" # 9. Build child name + persist via repo. The repo.create_study # call sets status='queued' (default for new studies); the @@ -235,7 +405,7 @@ async def enqueue_followup_study(ctx: dict[str, Any], parent_study_id: str) -> N name=child_name, cluster_id=parent.cluster_id, target=parent.target, - template_id=parent.template_id, + template_id=child_template_id, query_set_id=parent.query_set_id, judgment_list_id=parent.judgment_list_id, search_space=child_space.model_dump(), @@ -281,3 +451,47 @@ async def enqueue_followup_study(ctx: dict[str, Any], parent_study_id: str) -> N epsilon=outcome.epsilon, narrowed_param_names=narrowed_names, ) + + # 12. feat_overnight_final_solution Story 2.2 — strategy telemetry. + # Emitted AFTER child INSERT/commit so child_study_id is populated. + # Only under "follow_suggestions"; legacy/narrow stays log-quiet. + if strategy == "follow_suggestions" and selection_outcome is not None: + sel = selection_outcome.selected + if isinstance(sel, (NarrowFollowup, WidenFollowup, SwapTemplateFollowup)): + # Suppress when swap target was missing — the swap_target_missing + # WARN already covered that case (FR-8: distinct event shape). + if not (isinstance(sel, SwapTemplateFollowup) and swap_target_missing): + logger.info( + "auto_followup strategy selected an executable follow-up", + event_type="auto_followup_strategy_selected", + parent_study_id=parent_study_id, + child_study_id=child_id, + strategy="follow_suggestions", + selected_kind=child_config["auto_followup_selected_kind"], + source_index=selection_outcome.source_index, + candidate_count=selection_outcome.candidate_count, + dropped_template_ids=selection_outcome.dropped_template_ids, + ) + else: + # sel is None → no executable candidate (text-only digest OR + # all-swaps-cycle-dropped). Fallback to narrow took the + # `narrow_default` path. The fallback event carries the + # dropped ids so the operator sees the ping-pong-vs-text + # distinction on one line. Uses the diagnostic locals + # captured inside the db-context block above. + logger.info( + "auto_followup no executable candidate; fell back to narrow", + event_type="auto_followup_no_executable_candidate_fell_back_to_narrow", + parent_study_id=parent_study_id, + child_study_id=child_id, + digest_followup_kinds=digest_followup_kinds, + visited_template_id_count=visited_template_id_count, + dropped_template_ids=selection_outcome.dropped_template_ids, + ) + elif strategy == "follow_suggestions" and selection_outcome is None: + # The defensive try/except caught an unexpected error in the + # dispatch block. We already emitted the + # auto_followup_strategy_dispatch_error WARN inside the except; + # the child was still created on the narrow fallback path. No + # additional INFO event — the WARN is the audit signal. + pass diff --git a/docs/00_overview/BACKLOG_DASHBOARD.md b/docs/00_overview/BACKLOG_DASHBOARD.md index c4770360..4db02d84 100644 --- a/docs/00_overview/BACKLOG_DASHBOARD.md +++ b/docs/00_overview/BACKLOG_DASHBOARD.md @@ -2,7 +2,7 @@ # RelyLoop BACKLOG Dashboard -_Reflects feature-folder state as of **2026-06-03** (latest mtime of any planned/implemented feature `.md` file). Regenerated by `make dashboard` and the `mvp1-dashboard-regen` pre-commit hook. For the rich local view (filter chips, type colors), open [`backlog_dashboard.html`](backlog_dashboard.html) in a browser._ +_Reflects feature-folder state as of **2026-06-04** (latest mtime of any planned/implemented feature `.md` file). Regenerated by `make dashboard` and the `mvp1-dashboard-regen` pre-commit hook. For the rich local view (filter chips, type colors), open [`backlog_dashboard.html`](backlog_dashboard.html) in a browser._ ## Next up diff --git a/docs/00_overview/DASHBOARD.md b/docs/00_overview/DASHBOARD.md index 247bee12..7f7199d9 100644 --- a/docs/00_overview/DASHBOARD.md +++ b/docs/00_overview/DASHBOARD.md @@ -1,13 +1,13 @@ # RelyLoop — Release Roadmap -_Top-level index across MVP1 → GA v1+ as of **2026-06-03**. Click a release name to drill into the per-release dashboard. Theme labels sourced from [`docs/01_architecture/tech-stack.md` §"Canonical release matrix"](../01_architecture/tech-stack.md). For the rich local view, open [`dashboard.html`](dashboard.html) in a browser._ +_Top-level index across MVP1 → GA v1+ as of **2026-06-04**. Click a release name to drill into the per-release dashboard. Theme labels sourced from [`docs/01_architecture/tech-stack.md` §"Canonical release matrix"](../01_architecture/tech-stack.md). For the rich local view, open [`dashboard.html`](dashboard.html) in a browser._ ## Releases | Release | Theme | Progress | Status | |---|---|---|---| | [MVP1 / v0.1](MVP1_DASHBOARD.md) | The Loop | 94 / 94 scoped done | **Complete** | -| [MVP2 / v0.2](MVP2_DASHBOARD.md) | Three-Engine + Real Signals | 14 / 24 scoped done · 24 remaining | **In progress** | +| [MVP2 / v0.2](MVP2_DASHBOARD.md) | Three-Engine + Real Signals | 14 / 25 scoped done · 26 remaining | **In progress** | | MVP3 / v0.3 | Observable | — | **Not yet scoped** | | GA v1 / v1.0 | Production-ready | — | **Not yet scoped** | diff --git a/docs/00_overview/MVP1_DASHBOARD.md b/docs/00_overview/MVP1_DASHBOARD.md index 0831cefc..80e3affe 100644 --- a/docs/00_overview/MVP1_DASHBOARD.md +++ b/docs/00_overview/MVP1_DASHBOARD.md @@ -2,7 +2,7 @@ # RelyLoop MVP1 Dashboard -_Reflects feature-folder state as of **2026-06-03** (latest mtime of any planned/implemented feature `.md` file). Regenerated by `make dashboard` and the `mvp1-dashboard-regen` pre-commit hook. For the rich local view (filter chips, type colors), open [`mvp1_dashboard.html`](mvp1_dashboard.html) in a browser._ +_Reflects feature-folder state as of **2026-06-04** (latest mtime of any planned/implemented feature `.md` file). Regenerated by `make dashboard` and the `mvp1-dashboard-regen` pre-commit hook. For the rich local view (filter chips, type colors), open [`mvp1_dashboard.html`](mvp1_dashboard.html) in a browser._ ## Next up diff --git a/docs/00_overview/MVP2_DASHBOARD.md b/docs/00_overview/MVP2_DASHBOARD.md index 41796377..8af39f0b 100644 --- a/docs/00_overview/MVP2_DASHBOARD.md +++ b/docs/00_overview/MVP2_DASHBOARD.md @@ -2,7 +2,7 @@ # RelyLoop MVP2 Dashboard -_Reflects feature-folder state as of **2026-06-03** (latest mtime of any planned/implemented feature `.md` file). Regenerated by `make dashboard` and the `mvp1-dashboard-regen` pre-commit hook. For the rich local view (filter chips, type colors), open [`mvp2_dashboard.html`](mvp2_dashboard.html) in a browser._ +_Reflects feature-folder state as of **2026-06-04** (latest mtime of any planned/implemented feature `.md` file). Regenerated by `make dashboard` and the `mvp1-dashboard-regen` pre-commit hook. For the rich local view (filter chips, type colors), open [`mvp2_dashboard.html`](mvp2_dashboard.html) in a browser._ ## Next up @@ -20,16 +20,16 @@ Plan approved; run /impl-execute to ship | Metric | Value | |---|---| -| Filed under MVP2 | **43** folders total (done + specced not-done + idea backlog + bugs) | -| Specced features done | **14 / 24** (58%) — of features *past the idea stage* (those with a spec); the idea backlog below is NOT in this denominator, so 100% ≠ release complete | -| Pending work | **27** items (every not-done feat/infra/chore/bug across all priorities) | +| Filed under MVP2 | **46** folders total (done + specced not-done + idea backlog + bugs) | +| Specced features done | **14 / 25** (56%) — of features *past the idea stage* (those with a spec); the idea backlog below is NOT in this denominator, so 100% ≠ release complete | +| Pending work | **30** items (every not-done feat/infra/chore/bug across all priorities) | | → P0 — do next | **0** unblocking / paying daily cost | -| → P1 | **0** high-value, ready when P0 clears | -| → P2 (default) | 23 important to file, not blocking | +| → P1 | **1** high-value, ready when P0 clears | +| → P2 (default) | 25 important to file, not blocking | | → Backlog | 4 captured for record, not planned | | Open bugs | 9 | -| Legacy "Path to MVP2" | 24 items — scoped-not-done + bugs + chore-ideas only (excludes feat/infra ideas) | -| Backlog ideas | 3 idea-only feat/infra (not yet scoped into MVP2) | +| Legacy "Path to MVP2" | 26 items — scoped-not-done + bugs + chore-ideas only (excludes feat/infra ideas) | +| Backlog ideas | 4 idea-only feat/infra (not yet scoped into MVP2) | | In flight | 0 feature(s) actively shipping | ## Pipeline @@ -59,46 +59,49 @@ Plan approved; run /impl-execute to ship _None._ -### Plan (12) +### Plan (13) | # | Priority | Feature | Type | One-liner | Depends on | Status | |---|---|---|---|---|---|---| -| 1 | P2 | [feat_apply_path_normalizer_declaration](planned_features/02_mvp2/feat_apply_path_normalizer_declaration/feature_spec.md) | Feature | The winning normalizer ships as a **structured, language-agnostic manifest** in the config-repo PR — not just prose. | — | — | -| 2 | P2 | [feat_overnight_studies_summary_card](planned_features/02_mvp2/feat_overnight_studies_summary_card/feature_spec.md) | Feature | A "ran while you were away" card surfaces at the top of `/studies` when at least one overnight chain has completed since the operator's last visit. | — | [PR #343](https://github.com/SoundMindsAI/relyloop/pull/343) | -| 3 | P2 | [feat_query_normalization_tuning](planned_features/02_mvp2/feat_query_normalization_tuning/feature_spec.md) | Feature | A template that opts in by declaring `query_normalizer` as a Categorical param gets the Optuna loop deciding empirically — on the operator's judgment set — whether lowercasing, trimming, or contractio | — | — | -| 4 | P2 | [feat_query_normalizer_typed_pipeline](planned_features/02_mvp2/feat_query_normalizer_typed_pipeline/feature_spec.md) | Feature | A new typed search-space member `NormalizerPipelineParam` lets a template declare an **ordered list of normalization steps**; the Optuna loop samples over the powerset of declared steps and proposes t | — | — | -| 5 | P2 | [feat_ubi_llm_study_comparison](planned_features/02_mvp2/feat_ubi_llm_study_comparison/feature_spec.md) | Feature | A single dedicated route `/studies/compare?a={id}&b={id}` renders the two studies side-by-side with a per-panel diff column: a sentence-level digest-narrative diff, a best-trial parameter table with s | — | [PR #320](https://github.com/SoundMindsAI/relyloop/pull/320) | -| 6 | P2 | [chore_arq_pool_aclose_deprecation](planned_features/02_mvp2/chore_arq_pool_aclose_deprecation/feature_spec.md) | Chore | Both call sites use `await arq_pool.aclose()`; no `DeprecationWarning` on shutdown; a regression guard asserts the async-correct form on both paths so a future edit cannot silently reintroduce `close( | — | — | -| 7 | P2 | [chore_cluster_detail_rung_badge](planned_features/02_mvp2/chore_cluster_detail_rung_badge/feature_spec.md) | Chore | The cluster-detail page surfaces a `` for the cluster, scoped by a user-selected (or auto-seeded) query set + target. | — | [PR #320](https://github.com/SoundMindsAI/relyloop/pull/320) | -| 8 | P2 | [chore_demo_seeding_integration_tests_rewrite](planned_features/02_mvp2/chore_demo_seeding_integration_tests_rewrite/feature_spec.md) | Chore | The 9 skipped cases are rewritten to the async "POST + poll-until-terminal" shape, the timeout case is re-homed to the worker layer, a new `AC-Async` case asserts the `running → complete` polling tran | — | [PR #286](https://github.com/SoundMindsAI/relyloop/pull/286) | -| 9 | P2 | [chore_studies_post_arq_spy_fixture](planned_features/02_mvp2/chore_studies_post_arq_spy_fixture/feature_spec.md) | Chore | A reusable `arq_pool_spy` integration fixture that records every `enqueue_job(name, *args)` call, letting studies-POST tests positively assert `spy.calls == []` on rejection and `spy.calls == [("start | — | — | -| 10 | P2 | [chore_ubi_reader_search_after_pagination](planned_features/02_mvp2/chore_ubi_reader_search_after_pagination/feature_spec.md) | Chore | A new engine-neutral `SearchAdapter.scan_all` cursor-scan lets `UbiReader` iterate the **entire** matching event/query stream for a window (subject to a caller ceiling), folding each page into the agg | — | [PR #413](https://github.com/SoundMindsAI/relyloop/pull/413) | -| 11 | P2 | [bug_baseline_phase_test_isolation](planned_features/02_mvp2/bug_baseline_phase_test_isolation/feature_spec.md) | Bug | The three `TestComputeBaselineWaitS` cases pass standalone — `.venv/bin/python -m pytest backend/tests/unit/workers/test_orchestrator_baseline_phase.py -p no:randomly` is all-green with no reliance on | — | — | -| 12 | P2 | [bug_judgment_header_omits_click_bucket](planned_features/02_mvp2/bug_judgment_header_omits_click_bucket/feature_spec.md) | Bug | The header renders all three buckets (`llm`, `human`, `click`) so the displayed terms sum to the displayed total count, making the doc-comment claim ("the UI's source-breakdown card now renders all th | — | — | +| 1 | P1 | [feat_overnight_final_solution](planned_features/02_mvp2/feat_overnight_final_solution/feature_spec.md) | Feature | The wizard exposes a strategy choice alongside the existing depth: keep today's predictable `narrow` loop OR opt into `follow_suggestions`, which lets each chain link consume the parent digest's top * | — | deferred: Phase 2, Phase 3 | +| 2 | P2 | [feat_apply_path_normalizer_declaration](planned_features/02_mvp2/feat_apply_path_normalizer_declaration/feature_spec.md) | Feature | The winning normalizer ships as a **structured, language-agnostic manifest** in the config-repo PR — not just prose. | — | — | +| 3 | P2 | [feat_overnight_studies_summary_card](planned_features/02_mvp2/feat_overnight_studies_summary_card/feature_spec.md) | Feature | A "ran while you were away" card surfaces at the top of `/studies` when at least one overnight chain has completed since the operator's last visit. | — | [PR #343](https://github.com/SoundMindsAI/relyloop/pull/343) | +| 4 | P2 | [feat_query_normalization_tuning](planned_features/02_mvp2/feat_query_normalization_tuning/feature_spec.md) | Feature | A template that opts in by declaring `query_normalizer` as a Categorical param gets the Optuna loop deciding empirically — on the operator's judgment set — whether lowercasing, trimming, or contractio | — | — | +| 5 | P2 | [feat_query_normalizer_typed_pipeline](planned_features/02_mvp2/feat_query_normalizer_typed_pipeline/feature_spec.md) | Feature | A new typed search-space member `NormalizerPipelineParam` lets a template declare an **ordered list of normalization steps**; the Optuna loop samples over the powerset of declared steps and proposes t | — | — | +| 6 | P2 | [feat_ubi_llm_study_comparison](planned_features/02_mvp2/feat_ubi_llm_study_comparison/feature_spec.md) | Feature | A single dedicated route `/studies/compare?a={id}&b={id}` renders the two studies side-by-side with a per-panel diff column: a sentence-level digest-narrative diff, a best-trial parameter table with s | — | [PR #320](https://github.com/SoundMindsAI/relyloop/pull/320) | +| 7 | P2 | [chore_arq_pool_aclose_deprecation](planned_features/02_mvp2/chore_arq_pool_aclose_deprecation/feature_spec.md) | Chore | Both call sites use `await arq_pool.aclose()`; no `DeprecationWarning` on shutdown; a regression guard asserts the async-correct form on both paths so a future edit cannot silently reintroduce `close( | — | — | +| 8 | P2 | [chore_cluster_detail_rung_badge](planned_features/02_mvp2/chore_cluster_detail_rung_badge/feature_spec.md) | Chore | The cluster-detail page surfaces a `` for the cluster, scoped by a user-selected (or auto-seeded) query set + target. | — | [PR #320](https://github.com/SoundMindsAI/relyloop/pull/320) | +| 9 | P2 | [chore_demo_seeding_integration_tests_rewrite](planned_features/02_mvp2/chore_demo_seeding_integration_tests_rewrite/feature_spec.md) | Chore | The 9 skipped cases are rewritten to the async "POST + poll-until-terminal" shape, the timeout case is re-homed to the worker layer, a new `AC-Async` case asserts the `running → complete` polling tran | — | [PR #286](https://github.com/SoundMindsAI/relyloop/pull/286) | +| 10 | P2 | [chore_studies_post_arq_spy_fixture](planned_features/02_mvp2/chore_studies_post_arq_spy_fixture/feature_spec.md) | Chore | A reusable `arq_pool_spy` integration fixture that records every `enqueue_job(name, *args)` call, letting studies-POST tests positively assert `spy.calls == []` on rejection and `spy.calls == [("start | — | — | +| 11 | P2 | [chore_ubi_reader_search_after_pagination](planned_features/02_mvp2/chore_ubi_reader_search_after_pagination/feature_spec.md) | Chore | A new engine-neutral `SearchAdapter.scan_all` cursor-scan lets `UbiReader` iterate the **entire** matching event/query stream for a window (subject to a caller ceiling), folding each page into the agg | — | [PR #413](https://github.com/SoundMindsAI/relyloop/pull/413) | +| 12 | P2 | [bug_baseline_phase_test_isolation](planned_features/02_mvp2/bug_baseline_phase_test_isolation/feature_spec.md) | Bug | The three `TestComputeBaselineWaitS` cases pass standalone — `.venv/bin/python -m pytest backend/tests/unit/workers/test_orchestrator_baseline_phase.py -p no:randomly` is all-green with no reliance on | — | — | +| 13 | P2 | [bug_judgment_header_omits_click_bucket](planned_features/02_mvp2/bug_judgment_header_omits_click_bucket/feature_spec.md) | Bug | The header renders all three buckets (`llm`, `human`, `click`) so the displayed terms sum to the displayed total count, making the doc-comment claim ("the UI's source-breakdown card now renders all th | — | — | ### Spec (0) _None._ -### Idea (15) +### Idea (17) | # | Priority | Feature | Type | One-liner | Depends on | Status | |---|---|---|---|---|---|---| -| 1 | P2 | [infra_smoke_fork_pr_secret_skip](planned_features/02_mvp2/infra_smoke_fork_pr_secret_skip/idea.md) | Infra | `.github/workflows/pr.yml` triggers on `pull_request:` ([pr.yml:43](../.github/workflows/pr.yml)) — **not** `pull_request_target`. GitHub deliberately withholds repository secrets from workflows trigg | — | Idea — tangential discovery while merging PR #387 (`chore_arq_pool_aclose_deprecation`) | -| 2 | P2 | [chore_demo_reseed_partial_completion_fast_test](planned_features/02_mvp2/chore_demo_reseed_partial_completion_fast_test/idea.md) | Chore | `infra_solr_ci_readiness` made the demo reseed engine-tolerant: when an engine is unreachable, its scenario is skipped, the reseed completes with `status="complete"` and a non-empty `scenarios_skipped | — | Idea — tangential discovery during `infra_solr_ci_readiness` Story 1.2 implementation | -| 3 | P2 | [chore_pr_yml_parallelize_backend_job](planned_features/02_mvp2/chore_pr_yml_parallelize_backend_job/idea.md) | Chore | `.github/workflows/pr.yml` has a job named `backend (lint + typecheck + tests + coverage)` that runs four sequential things in one job: ruff/lint, mypy, the full pytest matrix (unit + integration + co | — | Idea — captured during PR #426 CI watch | -| 4 | P2 | [chore_solr_post_pipeline_followups](planned_features/02_mvp2/chore_solr_post_pipeline_followups/idea.md) | Chore | The 13-story `infra_adapter_solr` execution surfaced several follow-on items that fit neither the original spec nor any sister feature folder. None block the MVP2 Solr release — they're operator-exper | — | Idea — tangential observations from `infra_adapter_solr` end-to-end | -| 5 | P2 | [chore_ubi_hybrid_template_render](planned_features/02_mvp2/chore_ubi_hybrid_template_render/idea.md) | Chore | Idea — contract decision deferred (NOT a worker bug) | — | Idea — contract decision deferred (NOT a worker bug) | -| 6 | P2 | [bug_e2e_teardown_chain_node_delete_500](planned_features/02_mvp2/bug_e2e_teardown_chain_node_delete_500/idea.md) | Bug | The E2E global-teardown deletes seeded rows in a fixed order (per `chore_e2e_test_rows_isolation` Story 1.2 cleanup registration). For auto-followup **chains**, the seeded nodes are `queued` studies c | — | Idea — tangential discovery during `feat_overnight_autopilot` (Story 4.2 E2E, PR forthcoming) | -| 7 | P2 | [bug_relyloop_spec_ubi_section_drift](planned_features/02_mvp2/bug_relyloop_spec_ubi_section_drift/idea.md) | Bug | [`docs/00_overview/relyloop-spec.md`](relyloop-spec.md) §"Click-derived judgments — OpenSearch UBI as the engine-neutral primary path" (line ~706) carries two staleness bugs from the 2026-05-27 releas | — | Idea — captured during `feat_ubi_judgments` preflight (2026-05-29) | -| 8 | P2 | [bug_reseed_failure_blocks_retry_arq_singleton_dedup](planned_features/02_mvp2/bug_reseed_failure_blocks_retry_arq_singleton_dedup/idea.md) | Bug | `run_demo_reseed` is enqueued with a fixed Arq job id `demo_reseed:singleton` (the singleton concurrency guard). When a run reaches a terminal state, Arq stores its **result** under `arq:result:demo_r | — | Idea — tangential discovery while verifying `fix(demo): add Solr (8983) to the reseed engine host-URL mapping` (branch `feat_demo_reseed_solr_and_steplog`) | -| 9 | P2 | [bug_seed_meaningful_demos_silent_bulk_errors](planned_features/02_mvp2/bug_seed_meaningful_demos_silent_bulk_errors/idea.md) | Bug | [`scripts/seed_meaningful_demos.py:917-935`](../../scripts/seed_meaningful_demos.py#L917-L935) bulk-indexes 1000 Amazon ESCI products into a dedicated index per demo scenario: | — | Idea — captured during `bug_smoke_seed_es_unavailable_shards_race` Phase 2.5 tangential sweep | -| 10 | P2 | [bug_studies_detail_vitest_intermittent_timeout](planned_features/02_mvp2/bug_studies_detail_vitest_intermittent_timeout/idea.md) | Bug | Under the full `pnpm test` run (`vitest run`, default worker pool), the Study-detail-page render test sometimes blocks past the 5 s `testTimeout` default — but the test itself is data-driven from mock | — | Idea — captured during `chore_template_library_expansion` post-impl tangential sweep | -| 11 | P2 | [bug_webhook_concurrent_merge_race_timing_sensitive](planned_features/02_mvp2/bug_webhook_concurrent_merge_race_timing_sensitive/idea.md) | Bug | Idea — surfaced during `bug_demo_clusters_unreachable_in_healthz` PR #236 CI. | — | Idea — surfaced during `bug_demo_clusters_unreachable_in_healthz` PR #236 CI. | -| 12 | Backlog | [feat_fts_rank_ordering](planned_features/02_mvp2/feat_fts_rank_ordering/idea.md) | Feature | `feat_data_table_primitive` shipped filter-only FTS — `?q=foo` matches rows where `search_vector @@ plainto_tsquery('english', 'foo')` is true but orders results by `created_at DESC, id DESC` (the def | — | Idea — deferred from `feat_data_table_primitive` (MVP1) per spec §16. | -| 13 | Backlog | [infra_arq_subprocess_test](planned_features/02_mvp2/infra_arq_subprocess_test/idea.md) | Infra | Idea (deferred from `feat_study_lifecycle` Phase 2 / PR #25 final GPT-5.5 review). Still applicable as of 2026-05-14: the three in-process tests cited below still cover the resume contract correctly; | — | Idea (deferred from `feat_study_lifecycle` Phase 2 / PR #25 final GPT-5.5 review). Still applicable as of 2026-05-14: the three in-process tests cited below still cover the resume contract correctly; a subprocess test would add a narrow Arq-version-regression guard. | -| 14 | Backlog | [chore_auto_followup_parent_advisory_lock](planned_features/02_mvp2/chore_auto_followup_parent_advisory_lock/idea.md) | Chore | The shipped `feat_auto_followup_studies` worker uses a two-layer idempotency scheme: | — | Idea — captured as a standalone file to resolve broken cross-references in `feat_auto_followup_studies` D-11 + plan F2 + `bug_auto_followup_completed_parent_stop_chain_race/idea.md`. The slug was coined 2026-05-24 in D-11 but only existed as descriptive prose across other documents until now. | -| 15 | Backlog | [bug_chat_long_conversation_truncation](planned_features/02_mvp2/bug_chat_long_conversation_truncation/idea.md) | Bug | [`backend/app/services/agent_chat.send_user_message`](../../backend/app/services/agent_chat.py) defensively caps the OpenAI history at the most recent `HISTORY_MAX_MESSAGES = 100` messages… | — | Held for MVP2 (decided 2026-05-13). Folder renamed with `_mvp2` suffix to make the deferral visible at-a-glance in `ls docs/00_overview/planned_features/`. Resume work when MVP2 starts — no technical dependency on MVP2 infra (audit_log is N/A; Langfuse is convenience only); the deferral is scope discipline + zero current impact (latent bug, no operator has hit the 100-message cap). | +| 1 | P2 | [feat_proposal_full_param_space_view](planned_features/02_mvp2/feat_proposal_full_param_space_view/idea.md) | Feature | The proposal detail page surfaces `config_diff` — the subset of parameters the study **tuned** — and the winning values for them. Today's example proposal carries `{boost: {from: 1.0, to: 2.5}}` and r | — | Idea — user request during the same session as `feat_overnight_final_solution` | +| 2 | P2 | [infra_smoke_fork_pr_secret_skip](planned_features/02_mvp2/infra_smoke_fork_pr_secret_skip/idea.md) | Infra | `.github/workflows/pr.yml` triggers on `pull_request:` ([pr.yml:43](../.github/workflows/pr.yml)) — **not** `pull_request_target`. GitHub deliberately withholds repository secrets from workflows trigg | — | Idea — tangential discovery while merging PR #387 (`chore_arq_pool_aclose_deprecation`) | +| 3 | P2 | [chore_demo_reseed_partial_completion_fast_test](planned_features/02_mvp2/chore_demo_reseed_partial_completion_fast_test/idea.md) | Chore | `infra_solr_ci_readiness` made the demo reseed engine-tolerant: when an engine is unreachable, its scenario is skipped, the reseed completes with `status="complete"` and a non-empty `scenarios_skipped | — | Idea — tangential discovery during `infra_solr_ci_readiness` Story 1.2 implementation | +| 4 | P2 | [chore_e2e_overnight_strategy_radix_select_timing](planned_features/02_mvp2/chore_e2e_overnight_strategy_radix_select_timing/idea.md) | Chore | The Story 3.2 E2E spec walks the create-study wizard to Step 5, clicks the depth `` becomes visible. In chromium against `pnpm dev`, t | — | Idea — tangential follow-up captured during `feat_overnight_final_solution` Story 3.2 implementation | +| 5 | P2 | [chore_pr_yml_parallelize_backend_job](planned_features/02_mvp2/chore_pr_yml_parallelize_backend_job/idea.md) | Chore | `.github/workflows/pr.yml` has a job named `backend (lint + typecheck + tests + coverage)` that runs four sequential things in one job: ruff/lint, mypy, the full pytest matrix (unit + integration + co | — | Idea — captured during PR #426 CI watch | +| 6 | P2 | [chore_solr_post_pipeline_followups](planned_features/02_mvp2/chore_solr_post_pipeline_followups/idea.md) | Chore | The 13-story `infra_adapter_solr` execution surfaced several follow-on items that fit neither the original spec nor any sister feature folder. None block the MVP2 Solr release — they're operator-exper | — | Idea — tangential observations from `infra_adapter_solr` end-to-end | +| 7 | P2 | [chore_ubi_hybrid_template_render](planned_features/02_mvp2/chore_ubi_hybrid_template_render/idea.md) | Chore | Idea — contract decision deferred (NOT a worker bug) | — | Idea — contract decision deferred (NOT a worker bug) | +| 8 | P2 | [bug_e2e_teardown_chain_node_delete_500](planned_features/02_mvp2/bug_e2e_teardown_chain_node_delete_500/idea.md) | Bug | The E2E global-teardown deletes seeded rows in a fixed order (per `chore_e2e_test_rows_isolation` Story 1.2 cleanup registration). For auto-followup **chains**, the seeded nodes are `queued` studies c | — | Idea — tangential discovery during `feat_overnight_autopilot` (Story 4.2 E2E, PR forthcoming) | +| 9 | P2 | [bug_relyloop_spec_ubi_section_drift](planned_features/02_mvp2/bug_relyloop_spec_ubi_section_drift/idea.md) | Bug | [`docs/00_overview/relyloop-spec.md`](relyloop-spec.md) §"Click-derived judgments — OpenSearch UBI as the engine-neutral primary path" (line ~706) carries two staleness bugs from the 2026-05-27 releas | — | Idea — captured during `feat_ubi_judgments` preflight (2026-05-29) | +| 10 | P2 | [bug_reseed_failure_blocks_retry_arq_singleton_dedup](planned_features/02_mvp2/bug_reseed_failure_blocks_retry_arq_singleton_dedup/idea.md) | Bug | `run_demo_reseed` is enqueued with a fixed Arq job id `demo_reseed:singleton` (the singleton concurrency guard). When a run reaches a terminal state, Arq stores its **result** under `arq:result:demo_r | — | Idea — tangential discovery while verifying `fix(demo): add Solr (8983) to the reseed engine host-URL mapping` (branch `feat_demo_reseed_solr_and_steplog`) | +| 11 | P2 | [bug_seed_meaningful_demos_silent_bulk_errors](planned_features/02_mvp2/bug_seed_meaningful_demos_silent_bulk_errors/idea.md) | Bug | [`scripts/seed_meaningful_demos.py:917-935`](../../scripts/seed_meaningful_demos.py#L917-L935) bulk-indexes 1000 Amazon ESCI products into a dedicated index per demo scenario: | — | Idea — captured during `bug_smoke_seed_es_unavailable_shards_race` Phase 2.5 tangential sweep | +| 12 | P2 | [bug_studies_detail_vitest_intermittent_timeout](planned_features/02_mvp2/bug_studies_detail_vitest_intermittent_timeout/idea.md) | Bug | Under the full `pnpm test` run (`vitest run`, default worker pool), the Study-detail-page render test sometimes blocks past the 5 s `testTimeout` default — but the test itself is data-driven from mock | — | Idea — captured during `chore_template_library_expansion` post-impl tangential sweep | +| 13 | P2 | [bug_webhook_concurrent_merge_race_timing_sensitive](planned_features/02_mvp2/bug_webhook_concurrent_merge_race_timing_sensitive/idea.md) | Bug | Idea — surfaced during `bug_demo_clusters_unreachable_in_healthz` PR #236 CI. | — | Idea — surfaced during `bug_demo_clusters_unreachable_in_healthz` PR #236 CI. | +| 14 | Backlog | [feat_fts_rank_ordering](planned_features/02_mvp2/feat_fts_rank_ordering/idea.md) | Feature | `feat_data_table_primitive` shipped filter-only FTS — `?q=foo` matches rows where `search_vector @@ plainto_tsquery('english', 'foo')` is true but orders results by `created_at DESC, id DESC` (the def | — | Idea — deferred from `feat_data_table_primitive` (MVP1) per spec §16. | +| 15 | Backlog | [infra_arq_subprocess_test](planned_features/02_mvp2/infra_arq_subprocess_test/idea.md) | Infra | Idea (deferred from `feat_study_lifecycle` Phase 2 / PR #25 final GPT-5.5 review). Still applicable as of 2026-05-14: the three in-process tests cited below still cover the resume contract correctly; | — | Idea (deferred from `feat_study_lifecycle` Phase 2 / PR #25 final GPT-5.5 review). Still applicable as of 2026-05-14: the three in-process tests cited below still cover the resume contract correctly; a subprocess test would add a narrow Arq-version-regression guard. | +| 16 | Backlog | [chore_auto_followup_parent_advisory_lock](planned_features/02_mvp2/chore_auto_followup_parent_advisory_lock/idea.md) | Chore | The shipped `feat_auto_followup_studies` worker uses a two-layer idempotency scheme: | — | Idea — captured as a standalone file to resolve broken cross-references in `feat_auto_followup_studies` D-11 + plan F2 + `bug_auto_followup_completed_parent_stop_chain_race/idea.md`. The slug was coined 2026-05-24 in D-11 but only existed as descriptive prose across other documents until now. | +| 17 | Backlog | [bug_chat_long_conversation_truncation](planned_features/02_mvp2/bug_chat_long_conversation_truncation/idea.md) | Bug | [`backend/app/services/agent_chat.send_user_message`](../../backend/app/services/agent_chat.py) defensively caps the OpenAI history at the most recent `HISTORY_MAX_MESSAGES = 100` messages… | — | Held for MVP2 (decided 2026-05-13). Folder renamed with `_mvp2` suffix to make the deferral visible at-a-glance in `ls docs/00_overview/planned_features/`. Resume work when MVP2 starts — no technical dependency on MVP2 infra (audit_log is N/A; Langfuse is convenience only); the deferral is scope discipline + zero current impact (latent bug, no operator has hit the 100-message cap). | ## Dependency graph @@ -123,6 +126,8 @@ graph LR class chore_ubi_reader_search_after_pagination plan; feat_apply_path_normalizer_declaration["apply path normalizer declaration"] class feat_apply_path_normalizer_declaration plan; + feat_overnight_final_solution["overnight final solution"] + class feat_overnight_final_solution plan; feat_overnight_studies_summary_card["overnight studies summary card"] class feat_overnight_studies_summary_card plan; feat_query_normalization_tuning["query normalization tuning"] diff --git a/docs/00_overview/backlog_dashboard.html b/docs/00_overview/backlog_dashboard.html index 89b77fe0..ef5f310a 100644 --- a/docs/00_overview/backlog_dashboard.html +++ b/docs/00_overview/backlog_dashboard.html @@ -369,7 +369,7 @@

RelyLoop BACKLOG Dashboard

- Reflects feature-folder state as of 2026-06-03 (latest mtime of any + Reflects feature-folder state as of 2026-06-04 (latest mtime of any docs/00_overview/planned_features/ or docs/00_overview/implemented_features/ file). See state.md for the active branch context, diff --git a/docs/00_overview/dashboard.html b/docs/00_overview/dashboard.html index 6c2138f8..e4099445 100644 --- a/docs/00_overview/dashboard.html +++ b/docs/00_overview/dashboard.html @@ -368,7 +368,7 @@

RelyLoop — Release Roadmap

- Top-level index across MVP1 → GA v1+ as of 2026-06-03. Click a release name to + Top-level index across MVP1 → GA v1+ as of 2026-06-04. Click a release name to drill into the per-release dashboard. Theme labels sourced from tech-stack.md §"Canonical release matrix". See state.md for @@ -392,7 +392,7 @@

Releases

Three-Engine + Real Signals
-
14 / 24 scoped done · 24 remaining
+
14 / 25 scoped done · 26 remaining
In progress
diff --git a/docs/00_overview/mvp1_dashboard.html b/docs/00_overview/mvp1_dashboard.html index 9a168d7d..f616d0ec 100644 --- a/docs/00_overview/mvp1_dashboard.html +++ b/docs/00_overview/mvp1_dashboard.html @@ -369,7 +369,7 @@

RelyLoop MVP1 Dashboard

- Reflects feature-folder state as of 2026-06-03 (latest mtime of any + Reflects feature-folder state as of 2026-06-04 (latest mtime of any docs/00_overview/planned_features/ or docs/00_overview/implemented_features/ file). See state.md for the active branch context, diff --git a/docs/00_overview/mvp2_dashboard.html b/docs/00_overview/mvp2_dashboard.html index 7327a67a..cf72272c 100644 --- a/docs/00_overview/mvp2_dashboard.html +++ b/docs/00_overview/mvp2_dashboard.html @@ -369,7 +369,7 @@

RelyLoop MVP2 Dashboard

- Reflects feature-folder state as of 2026-06-03 (latest mtime of any + Reflects feature-folder state as of 2026-06-04 (latest mtime of any docs/00_overview/planned_features/ or docs/00_overview/implemented_features/ file). See state.md for the active branch context, @@ -397,13 +397,13 @@

MVP2 Progress

Specced features done
-
14 / 24
-
58% specced · 43 filed under MVP2
-
+
14 / 25
+
56% specced · 46 filed under MVP2
+
Pending work
-
27
+
30
every not-done feat/infra/chore/bug across all priorities
@@ -420,12 +420,12 @@

MVP2 Progress

P1
-
0
+
1
high-value, ready when P0 clears
P2 (default)
-
23
+
25
important to file, not blocking
@@ -435,14 +435,14 @@

MVP2 Progress

Legacy "Path to MVP2"
-
24
+
26
scoped not-done + bugs + chore-ideas only (excludes feat/infra ideas)
Backlog ideas: - 3 idea-only feat/infra folders (not yet scoped into MVP2) + 4 idea-only feat/infra folders (not yet scoped into MVP2) In flight: @@ -463,7 +463,20 @@

Pipeline

-

Idea 15

+

Idea 17

+ +
+ +
+ Feature + P2 + +
+
The proposal detail page surfaces `config_diff` — the subset of parameters the study **tuned** — and the winning values for them. Today's example proposal carries `{boost: {from: 1.0, to: 2.5}}` and r
+ + +
+
@@ -491,6 +504,19 @@

Idea 15

+
+ +
+ Chore + P2 + +
+
The Story 3.2 E2E spec walks the create-study wizard to Step 5, clicks the depth `<Select>` ("2 follow-ups"), and asserts the new Strategy `<Select>` becomes visible. In chromium against `pnpm dev`, t
+ + +
+ +
@@ -667,7 +693,20 @@

Spec 0

-

Plan 12

+

Plan 13

+ +
+ +
+ Feature + P1 + +
+
The wizard exposes a strategy choice alongside the existing depth: keep today's predictable `narrow` loop OR opt into `follow_suggestions`, which lets each chain link consume the parent digest's top *
+
deferred: Phase 2, Phase 3
+ +
+
@@ -1066,6 +1105,8 @@

Dependency graph (feat_ + infra_)

class chore_ubi_reader_search_after_pagination plan; feat_apply_path_normalizer_declaration["apply path normalizer declaration"] class feat_apply_path_normalizer_declaration plan; + feat_overnight_final_solution["overnight final solution"] + class feat_overnight_final_solution plan; feat_overnight_studies_summary_card["overnight studies summary card"] class feat_overnight_studies_summary_card plan; feat_query_normalization_tuning["query normalization tuning"] @@ -1121,6 +1162,8 @@

Dependency graph (feat_ + infra_)

class chore_ubi_reader_search_after_pagination plan; feat_apply_path_normalizer_declaration["apply path normalizer declaration"] class feat_apply_path_normalizer_declaration plan; + feat_overnight_final_solution["overnight final solution"] + class feat_overnight_final_solution plan; feat_overnight_studies_summary_card["overnight studies summary card"] class feat_overnight_studies_summary_card plan; feat_query_normalization_tuning["query normalization tuning"] diff --git a/docs/00_overview/planned_features/02_mvp2/chore_e2e_overnight_strategy_radix_select_timing/idea.md b/docs/00_overview/planned_features/02_mvp2/chore_e2e_overnight_strategy_radix_select_timing/idea.md new file mode 100644 index 00000000..12e92b75 --- /dev/null +++ b/docs/00_overview/planned_features/02_mvp2/chore_e2e_overnight_strategy_radix_select_timing/idea.md @@ -0,0 +1,57 @@ +# Stabilize the overnight Strategy toggle E2E (Radix Select + react-hook-form timing) + +**Date:** 2026-06-03 +**Status:** Idea — tangential follow-up captured during `feat_overnight_final_solution` Story 3.2 implementation +**Priority:** P2 +**Origin:** Story 3.2 of `feat_overnight_final_solution` planned an `ui/tests/e2e/overnight-strategy.spec.ts` that exercises the wizard's new Strategy toggle end-to-end (AC-4 visibility + AC-5 wire submission). The spec was written and works at the JSX level — the 6 vitest cases at [`create-study-modal.overnight-strategy.test.tsx`](../../../../ui/src/__tests__/components/studies/create-study-modal.overnight-strategy.test.tsx) all pass — but the chromium-against-dev-server pass fails consistently: after clicking the depth Radix `` ("2 follow-ups"), and asserts the new Strategy `` at `ui/src/components/studies/create-study-modal.tsx` (FR-2). | +| `StudyChainLink.selected_followup_kind` | `narrow_default`, `narrow`, `widen`, `swap_template` (or `null`) | New module-level Literal `SELECTED_FOLLOWUP_KIND_VALUES: tuple[str, ...]` in `backend/app/domain/study/auto_followup_strategy.py`. Cite in `ui/src/lib/enums.ts` per the Story 2.13 lint guard. | Per-link badge in `auto-followup-chain-panel.tsx` (FR-7). | + +The four `FOLLOWUP_KIND_VALUES` (`narrow`, `widen`, `text`, `swap_template`) at [`backend/app/domain/study/followups.py:158`](../../../../backend/app/domain/study/followups.py#L158) remain the source of truth for the digest's discriminated union — unchanged. + +### 8.6 Error code catalog + +| Code | HTTP Status | Meaning | +|------|-------------|---------| +| `AUTO_FOLLOWUP_STRATEGY_INVALID` | `422` | `config.auto_followup_strategy` is set without `auto_followup_depth >= 1`, OR carries a value outside the allowed Literal. | + +## 9) Data model and state transitions + +### New/changed entities + +**No schema changes. No migration.** + +`studies.config` (JSONB) gains three optional keys, all written by the wizard or worker as part of the existing INSERT: + +| Key | Type | Set by | Notes | +|---|---|---|---| +| `auto_followup_strategy` | `"narrow" \| "follow_suggestions" \| absent` | Wizard (FR-2), inherited by autopilot children (FR-3) | Absent + `"narrow"` are equivalent to the worker. | +| `auto_followup_visited_template_ids` | `list[str]` | Autopilot worker under `follow_suggestions` only (FR-5) | Never set under `"narrow"` strategy. | +| `auto_followup_selected_kind` | `"narrow_default" \| "narrow" \| "widen" \| "swap_template" \| absent` | Autopilot worker (FR-3 + FR-5) | Absent for anchor + for `"narrow"` strategy. | + +The existing `studies.config.auto_followup_depth` is read + decremented unchanged. + +The existing `digests.suggested_followups` is read (new consumer); never written by this feature. + +### Required invariants + +- **Strategy is inherited verbatim.** A chain's anchor sets `auto_followup_strategy` at study creation; every descendant under that chain MUST carry the same value in `child.config.auto_followup_strategy`. The worker is the only writer of this key on autopilot-created children; it copies `parent.config.auto_followup_strategy` without modification. +- **Visited-template guard MUST hold.** Under `follow_suggestions`, `select_executable_followup` MUST exclude any `SwapTemplateFollowup` whose `template_id` is already in `parent.config.auto_followup_visited_template_ids`. The set is constructed from the parent's persisted list (defaulting to `[parent.template_id]` when absent — the anchor case). +- **Fallback-to-narrow MUST run when no executable candidate is selected.** The worker MUST NOT emit SKIP_NO_LIFT or any non-ENQUEUE outcome because of an empty selection result. The narrow path is the safety net. +- **`selected_followup_kind` is informational only.** The field is on `StudyChainLink` for surfacing; the worker writes it to `child.config.auto_followup_selected_kind`. It MUST NOT be consulted by `evaluate_chain_gate` or any other gate — it's a post-decision audit field. +- **Wire backward compatibility.** A study created with `config = {auto_followup_depth: 3}` (no strategy key) MUST produce byte-identical worker behavior to today. The migration story is "no migration" — every existing row already satisfies the new contract. +- **`auto_followup_strategy` ⇒ depth ≥ 1.** Pair validator at the API. The `_validate_auto_followup_strategy` validator MUST raise the `AUTO_FOLLOWUP_STRATEGY_INVALID:` prefixed `ValueError` when the pair rule is violated. + +### State transitions + +`Study.status` transitions are unchanged. The strategy field doesn't move studies between states — it shapes which template/search-space the autopilot uses when creating the next link. + +The worker's internal dispatch (added by FR-3) introduces these outcome paths. Note the **two distinct rows** for the narrow path — legacy-or-default chains (strategy `None` / `"narrow"`) persist NO `auto_followup_selected_kind` key, while `follow_suggestions`-fallback chains persist `"narrow_default"` (cycle 2 finding C2-A3 + D-12): + +| Worker outcome | Strategy active | Telemetry event | `child.config.auto_followup_selected_kind` | +|---|---|---|---| +| **Legacy/default narrow path** | `None` or `"narrow"` | (no new event; existing `auto_followup_enqueued` only) | (key NOT persisted — worker pops before INSERT) | +| Follow-up consumed: `narrow` | `"follow_suggestions"` | `auto_followup_strategy_selected` + `auto_followup_enqueued` | `"narrow"` | +| Follow-up consumed: `widen` | `"follow_suggestions"` | same | `"widen"` | +| Follow-up consumed: `swap_template` (target exists) | `"follow_suggestions"` | same | `"swap_template"` | +| Swap target deleted → fallback | `"follow_suggestions"` | `auto_followup_swap_target_missing` (WARN) + `auto_followup_enqueued` | `"narrow_default"` | +| **`follow_suggestions` fallback (no candidate)** | `"follow_suggestions"` | `auto_followup_no_executable_candidate_fell_back_to_narrow` + `auto_followup_enqueued` | `"narrow_default"` | + +### Idempotency/replay behavior + +The strategy dispatch is **deterministic**. Given the same `parent.config`, `parent` row, `best_trial.params`, `template.declared_params`, and `digest.suggested_followups`, the worker produces the same child every invocation. Combined with the existing `_job_id`-based layer-1 idempotency at [`digest.py:1302`](../../../../backend/workers/digest.py#L1302) and the `list_children_of_study` layer-2 backstop at [`auto_followup.py:91-99`](../../../../backend/workers/auto_followup.py#L91-L99), replays produce identical results — no new replay risk introduced. + +## 10) Security, privacy, and compliance + +- **Threats:** + - A malicious or compromised digest could persist a `SwapTemplateFollowup` pointing at an arbitrary `template_id`. The autopilot would create a child against that template. Mitigated by: (a) the digest worker already validates `template_id` (36-char UUID, must exist) via `remap_search_space_for_swap_target` before persisting, (b) RelyLoop's single-tenant MVP2 posture means there's no cross-tenant template surface to attack, (c) the cycle guard prevents repeated re-entry. + - A misbehaving LLM output could cause the autopilot to always pick `swap_template` and starve narrowing chains. Mitigated by the `"narrow"` strategy remaining the default — operators opt in to the broader behavior explicitly. +- **Controls:** None new — relies on the digest worker's existing validation chain. +- **Secrets/key handling:** N/A — no secrets touched, no new LLM call. +- **Auditability:** Three new structlog events (2 INFO + 1 WARN per FR-8). `audit_log` lands at MVP3. +- **Data retention/deletion/export impact:** N/A. + +## 11) UX flows and edge cases + +### Information architecture + +- **Navigation placement (wizard):** Strategy toggle is a NEW row directly beneath the existing depth selector at Step 5 ("Objective + config"). Same modal, same step, no new screen. +- **Navigation placement (chain panel):** No change. The new `selected_followup_kind` badge renders inline within each link's existing list item. +- **Labeling taxonomy:** + - Wizard depth label (existing, unchanged): `"🌙 Run overnight (compound automatically)"`. + - **Strategy label (new):** `"Strategy"` (compact — the toggle's two display labels carry the explanation). + - Strategy options: `"Refine the same knobs (predictable)"` (wire `"narrow"`) | `"Try suggested follow-ups (broader exploration)"` (wire `"follow_suggestions"`). + - Helper text under the strategy toggle: per FR-2. + - Chain panel per-link badges: `"refined"`, `"narrow ↓"`, `"widen ↑"`, `"swapped to {template_name}"` (per FR-7). +- **Content hierarchy (wizard):** Depth row first, strategy row immediately below — same visual cluster. The strategy row appears with the same `space-y-1.5` spacing used by other Step-5 controls. +- **Progressive disclosure:** Strategy toggle is hidden when depth = `Off` (matches the pair-validator semantic — no point picking a strategy for a one-shot study). Appears the moment depth ≥ 1 is selected. +- **Relationship to existing pages:** Pure additive surface — extends `feat_overnight_autopilot`'s wizard step + chain panel. + +### Tooltips and contextual help + +| Element | Tooltip / help text | Trigger | Placement | Glossary key | +|---|---|---|---|---| +| Wizard label `"Strategy"` | (short) How each follow-up is chosen. Refine: tighter bounds on the same knobs. Try suggestions: digest's top runnable recommendation. | hover/focus on info icon | right of label | `overnight_strategy` (NEW — FR-9) | +| Chain link badge `"swapped to {template_name}"` | (short) Try suggestions strategy: this link followed the digest's swap_template recommendation and ran against a different query template. | hover/focus | right of badge | `overnight_strategy` (existing, reused) | + +### Primary flows + +1. **Cross-knob overnight discovery flow.** Operator opens the create-study modal → Step 5 → picks `Deep (1000)` preset → sees the existing overnight hint → toggles `🌙 Run overnight` to depth 3 → new strategy toggle appears → picks `"Try suggested follow-ups (broader exploration)"` → submits. Anchor runs, completes overnight; the autopilot worker reads the anchor's digest, picks the top executable follow-up (say a `widen` on `title_boost`), creates link 2 with the widened bounds. Link 2 completes; the worker picks link 2's top executable (say a `swap_template` to `function-score-v1`), creates link 3 against that template. Link 3 plateaus (`no_lift`); chain stops. Operator wakes up to `/studies/{anchor_id}` → chain panel shows the three links with their `selected_followup_kind` badges → cumulative lift = +18% → best link is #3 with a proposal — clicks "Best config" CTA → lands on the proposal page → opens the PR. +2. **Cycle-guard prevention flow.** Anchor runs (template A), digest suggests `swap_template → B`. Worker creates link 2 against B. Link 2's digest suggests `swap_template → A` AND `narrow`. The autopilot's cycle guard sees A in `visited_template_ids` and drops the swap; picks the `narrow` instead. Link 3 runs against B with narrowed bounds. Operator sees the cycle-guard activity in the `dropped_template_ids` field of the `auto_followup_strategy_selected` log event for link 3 (`dropped_template_ids = ["TEMPLATE_A"]`, `selected_kind = "narrow"`) — one line tells the full story. +3. **Fallback-to-narrow flow.** Anchor runs, digest emits only `text` follow-ups ("re-run with bigger budget" — typical for `still_improving` verdict). Worker logs `auto_followup_no_executable_candidate_fell_back_to_narrow`, takes today's narrow path, creates link 2 with `selected_followup_kind = "narrow_default"`. Operator wakes up to a chain that still made progress (current behavior preserved); the chain panel renders link 2's badge as the lighter-weight `"refined"`. +4. **Backward-compatibility flow.** Operator picks `Off` depth → no strategy toggle visible → submits study with `config = {max_trials: 200, ...}` (no `auto_followup_depth`, no `auto_followup_strategy`). Worker behaves byte-identically to pre-feature. Zero observable change. +5. **Operator-pause-mid-chain flow.** Operator visits a mid-chain study → existing `POST /studies/{id}/cancel?cascade=true` halts pending children unchanged. The strategy doesn't affect the cancel path. + +### Edge/error flows + +- **Digest row missing under `follow_suggestions`.** Defensive: should not occur because the digest worker enqueues `enqueue_followup_study` AFTER persisting. If it does (e.g., manual digest deletion mid-chain): worker logs WARN, falls back to narrow. +- **Empty `suggested_followups` list.** `select_executable_followup` returns `None`; fallback fires. Chain continues with narrow. +- **All executable candidates filtered by cycle guard.** Same — `None` returned, fallback fires. +- **`SwapTemplateFollowup` references a deleted template.** Defensive: the worker's existing `repo.get_query_template` call at [`auto_followup.py:200`](../../../../backend/workers/auto_followup.py#L200) handles `None`; under `follow_suggestions` the worker MUST run the same defensive get against the swap target's `template_id` before INSERT. On miss: log WARN, fall back to narrow against `parent.template_id`. +- **`auto_followup_strategy = "follow_suggestions"` AND `auto_followup_depth = 0` at API.** Pair validator rejects at create with 422 `AUTO_FOLLOWUP_STRATEGY_INVALID`. +- **`auto_followup_strategy = "garbage_value"`.** Pair validator rejects at create with 422 `AUTO_FOLLOWUP_STRATEGY_INVALID`. + +### Recovery + +If the chain produces an unexpected swap_template result the operator wants to abort: existing cancel cascade (`POST /studies/{id}/cancel?cascade=true`) halts pending children — no change. + +## 12) Given/When/Then acceptance criteria + +### AC-1: Strategy validator pair-check (FR-1) + +- Given a `POST /api/v1/studies` request with `config = {auto_followup_strategy: "follow_suggestions"}` and no `auto_followup_depth` (or depth = 0) +- When the request is validated +- Then the response is `422 { "detail": { "error_code": "AUTO_FOLLOWUP_STRATEGY_INVALID", "message": "auto_followup_strategy only applies when auto_followup_depth >= 1", "retryable": false } }`. + +### AC-2: Strategy validator value-check (FR-1) + +- Given a `POST /api/v1/studies` request with `config = {auto_followup_depth: 3, auto_followup_strategy: "broaden_everything"}` +- When the request is validated +- Then the response is `422 AUTO_FOLLOWUP_STRATEGY_INVALID`. + +### AC-3: Default behavior unchanged (FR-3 backward compatibility) + +- Given a study with `config = {auto_followup_depth: 3}` (no `auto_followup_strategy` key) and a completed parent with a winning trial +- When `enqueue_followup_study` runs +- Then the child is created with `template_id = parent.template_id`, the search space narrowed ±50% around the winner (existing behavior), AND `child.config` contains **neither** `auto_followup_selected_kind` **nor** `auto_followup_visited_template_ids` (the legacy path persists neither new key per FR-3 + FR-5; cycle 1 finding C1-A1). `GET /chain` returns this link with `selected_followup_kind = null`. The existing `auto_followup_enqueued` telemetry event fires; **no new event** fires. + +### AC-4: Wizard toggle hidden when depth = 0 (FR-2) + +- Given the create-study modal is open at Step 5 with `auto_followup_depth = "Off"` +- When the form renders +- Then no element with `data-testid="cs-overnight-strategy"` is present in the DOM. Setting depth to 1 makes the toggle appear with `"narrow"` selected by default. + +### AC-5: Strategy persisted to config on submit (FR-2) + +- Given the operator picks `auto_followup_depth = 3` and `Strategy = "Try suggested follow-ups (broader exploration)"` +- When the form submits +- Then the request body's `config` contains `auto_followup_depth: 3` AND `auto_followup_strategy: "follow_suggestions"`. + +### AC-6: `follow_suggestions` consumes top executable narrow follow-up (FR-3 + FR-4) + +- Given a parent study with `config = {auto_followup_depth: 3, auto_followup_strategy: "follow_suggestions"}` (no `auto_followup_visited_template_ids` key — anchor case), a completed digest carrying `suggested_followups = [{kind: "narrow", rationale: "...", search_space: {...}}, {kind: "text", rationale: "..."}]`, and the chain gate ENQUEUES +- When `enqueue_followup_study` runs +- Then the child is created with the `narrow` follow-up's `search_space` (verbatim, not re-narrowed), `template_id = parent.template_id`, `child.config.auto_followup_selected_kind = "narrow"`, `child.config.auto_followup_visited_template_ids = [parent.template_id]` (the ordered-unique list — since `child.template_id == parent.template_id` the list does not grow per FR-5 + cycle 1 finding C1-A5). Telemetry (emitted AFTER child INSERT): `auto_followup_strategy_selected` fires with `selected_kind = "narrow"`, `source_index = 0`, `candidate_count = 1`, `dropped_template_ids = []`, `child_study_id` populated. Existing `auto_followup_enqueued` also fires. + +### AC-7: `follow_suggestions` consumes `swap_template` and branches template (FR-3 + FR-4) + +- Given a parent study with `config = {auto_followup_depth: 2, auto_followup_strategy: "follow_suggestions", auto_followup_visited_template_ids: ["TEMPLATE_A"]}`, `parent.template_id = "TEMPLATE_A"`, and a digest with `suggested_followups = [{kind: "swap_template", template_id: "TEMPLATE_B", search_space: {...remapped...}, rationale: "..."}]` +- When `enqueue_followup_study` runs +- Then the child is created with `template_id = "TEMPLATE_B"`, the follow-up's `search_space` verbatim, `child.config.auto_followup_selected_kind = "swap_template"`, `child.config.auto_followup_visited_template_ids = ["TEMPLATE_A", "TEMPLATE_B"]`. Telemetry: `auto_followup_strategy_selected` fires with `selected_kind = "swap_template"`. + +### AC-8: Cycle guard drops `swap_template` to already-visited template (FR-4 + FR-5) + +- Given a parent study with `config.auto_followup_strategy = "follow_suggestions"`, `config.auto_followup_visited_template_ids = ["TEMPLATE_A", "TEMPLATE_B"]`, and a digest with `suggested_followups = [{kind: "swap_template", template_id: "TEMPLATE_A", ...}, {kind: "widen", search_space: {...}}]` +- When `select_executable_followup` runs +- Then the swap_template to TEMPLATE_A is dropped; the `widen` is selected (`SelectionResult.item.kind == "widen"`, `source_index == 1`, `candidate_count == 1` after filter, `dropped_template_ids == ["TEMPLATE_A"]`). The child runs the `widen` follow-up with `selected_kind = "widen"`. Telemetry: `auto_followup_strategy_selected` fires (AFTER child INSERT) with `selected_kind = "widen"`, `source_index = 1`, `candidate_count = 1`, `dropped_template_ids = ["TEMPLATE_A"]`, `child_study_id` populated. + +### AC-9: Fallback-to-narrow on empty executable candidates (FR-3 + FR-4) + +- Given a parent study with `config.auto_followup_strategy = "follow_suggestions"` and a digest with `suggested_followups = [{kind: "text", rationale: "re-run with bigger budget"}, {kind: "text", rationale: "..."}]` (no executable items) +- When `enqueue_followup_study` runs +- Then `select_executable_followup` returns `None`; the worker takes today's narrow path; child is created with `template_id = parent.template_id`, narrowed search space, `child.config.auto_followup_selected_kind = "narrow_default"`, `child.config.auto_followup_visited_template_ids = [parent.template_id]`. Telemetry: `auto_followup_no_executable_candidate_fell_back_to_narrow` fires AFTER child INSERT with `child_study_id` populated, `digest_followup_kinds = ["text", "text"]`, `visited_template_id_count = 1`, `dropped_template_ids = []`. The chain does not stall. + +### AC-10: Strategy inherited verbatim (FR-3 + FR-5) + +- Given a parent study with `config.auto_followup_strategy = "follow_suggestions"` and the worker successfully creates a child +- When the child row is inserted +- Then `child.config.auto_followup_strategy = "follow_suggestions"` (verbatim from parent). The child's own auto-followup, when it eventually runs, will also dispatch on the `follow_suggestions` branch. + +### AC-11: `StudyChainLink.selected_followup_kind` populated (FR-6) + +- Given a 3-link chain where link 2 has `config.auto_followup_selected_kind = "narrow"` and link 3 has `config.auto_followup_selected_kind = "swap_template"` +- When `GET /api/v1/studies/{any_link_id}/chain` is called +- Then `response.links[0].selected_followup_kind == null` (anchor), `response.links[1].selected_followup_kind == "narrow"`, `response.links[2].selected_followup_kind == "swap_template"`. + +### AC-12: `StudyChainLink.selected_followup_kind` null for legacy chains (FR-6 backward compatibility) + +- Given a 3-link chain created entirely under the legacy `narrow` strategy (no `config.auto_followup_selected_kind` key on any link) +- When `GET /api/v1/studies/{any_link_id}/chain` is called +- Then every `links[i].selected_followup_kind == null`. The existing chain-panel rendering tests continue to pass. + +### AC-13: Chain panel renders strategy badges (FR-7) + +- Given the chain endpoint returns the AC-11 payload +- When `` mounts under `/studies/{link_id}` +- Then for link 2 a badge with `data-testid="chain-link-strategy-{link_2_id}"` reading `"narrow ↓"` renders; for link 3 a badge `"swapped to {template_short_name}"` renders. Link 1 has no badge. + +### AC-14: Chain panel preserved for legacy chains (FR-7 backward compatibility) + +- Given the chain endpoint returns a payload where every link has `selected_followup_kind = null` +- When the panel mounts +- Then no `chain-link-strategy-*` testid is present; the existing rendering matches today's snapshot. + +### AC-15: Tutorial section exists (FR-9) + +- Given the tutorial page is rendered +- When an operator reaches Step 12 ("Run the loop overnight") +- Then a sub-section explains the strategy choice and explicitly names the cycle guard + the narrow-fallback contract. + +### AC-16: Glossary key exists (FR-9) + +- Given the glossary file `ui/src/lib/glossary.ts` +- When the value-lock test asserts on `glossary['overnight_strategy']` +- Then the entry has `short` (≤120 chars) and `long` (paragraph) fields; `short` includes both wire values verbatim (`"narrow"` and `"follow_suggestions"`) so frontend mapping never drifts silently. + +### AC-17: Deleted swap target → defensive fallback (FR-3 + edge flow per cycle 1 finding C1-B4) + +- Given a parent study with `config.auto_followup_strategy = "follow_suggestions"` and a digest whose top executable follow-up is a `swap_template` pointing at `template_id = "TEMPLATE_DELETED"` which no longer exists in `query_templates` (e.g., template was hard-deleted between digest generation and autopilot dispatch) +- When `enqueue_followup_study` runs and reaches the defensive `repo.get_query_template(db, "TEMPLATE_DELETED")` lookup per FR-3 +- Then the worker logs a structlog WARN with `event_type = "auto_followup_swap_target_missing"`, `parent_study_id`, `swap_target_template_id = "TEMPLATE_DELETED"`. The worker falls back to the narrow path: child created with `template_id = parent.template_id`, narrowed search space, `child.config.auto_followup_selected_kind = "narrow_default"`. The chain does NOT 500 or SKIP; it continues with the same safety-net semantics as the empty-executable-candidates path. `auto_followup_no_executable_candidate_fell_back_to_narrow` is NOT emitted (the candidate existed but pointed at a deleted target — distinct telemetry shape), but the existing `auto_followup_enqueued` still fires. + +### AC-18: Stale parent `selected_kind` does NOT leak to child (FR-3 inheritance + cycle 1 finding C1-B5) + +- Given a parent study with `config = {auto_followup_depth: 3, auto_followup_strategy: "follow_suggestions", auto_followup_selected_kind: "widen"}` (the parent was itself a chain-child whose selection was `"widen"`) and a digest whose top executable follow-up is a `swap_template` +- When `enqueue_followup_study` runs and creates the child +- Then `child.config.auto_followup_selected_kind == "swap_template"` (overwrites the parent's `"widen"`, NEVER inherits it). On the legacy path (parent's strategy is `None` or `"narrow"`), `child.config` MUST NOT contain `auto_followup_selected_kind` at all (the worker pops it out before persist, even if `parent.config` happened to carry one from a prior strategy). Integration test asserts this directly on the child row. + +## 13) Non-functional requirements + +- **Performance:** The strategy dispatch adds **at most one extra DB SELECT** per chain link (the `digests` row lookup keyed by `study_id`, which is UNIQUE-indexed). p99 < 50ms additional latency per child enqueue. The `select_executable_followup` function is O(N) over follow-up list length (max 5 per digest worker structured-output schema cap), trivially fast. +- **Reliability:** The new branches in `enqueue_followup_study` MUST be exception-safe: any unexpected error in digest read / parse / select MUST be caught and fall back to today's narrow path with a WARN log. Chain reliability MUST NOT regress vs the legacy path. +- **Operability:** Three new structlog event types (2 INFO + 1 WARN per FR-8). Runbook update (FR-9) explains how to grep for them and how to distinguish the routine fallback from the deleted-swap-target WARN. No new env vars, no new metrics, no new alerts. +- **Accessibility:** Strategy toggle MUST carry an `aria-label` mirroring its visual label, and the `InfoTooltip` MUST include `ariaLabel` (existing pattern). + +## 14) Test strategy requirements (spec-level) + +- **Unit tests (`backend/tests/unit/`):** + - `domain/study/test_auto_followup_strategy.py` (new) — `select_executable_followup` matrix: empty list → None; text-only list → None; mixed text + narrow → narrow selected at first executable index; mixed text + swap_template (visited) + widen → widen selected (cycle-guard drop); swap_template to non-visited template → swap selected; multiple executable candidates → first-by-index wins. Pure-function tests only. + - `domain/study/test_auto_followup.py` — existing tests continue passing unchanged. +- **Integration tests (`backend/tests/integration/`):** + - `workers/test_auto_followup_strategy.py` (new) — DB-backed: seed parent + digest with each executable kind; assert child row's `template_id`, `config.auto_followup_strategy`, `config.auto_followup_visited_template_ids`, `config.auto_followup_selected_kind`. Cover fallback-to-narrow when digest has only text. Cover cycle-guard drop. Cover legacy `narrow` strategy producing byte-identical state to pre-feature. +- **Contract tests (`backend/tests/contract/`):** + - `test_studies_chain_contract.py` (extend) — assert `selected_followup_kind` optional field on `StudyChainLink`; assert `SELECTED_FOLLOWUP_KIND_VALUES` enum exposed via the domain module (CI source-of-truth grep gate per `verify_enum_source_of_truth.sh`). + - `test_studies_create_contract.py` (extend) — assert `AUTO_FOLLOWUP_STRATEGY_INVALID` (422) on pair-rule violation and value-rule violation; assert `auto_followup_strategy: "follow_suggestions"` round-trips through study create with `auto_followup_depth: 3`. +- **Vitest (UI unit/component) (`ui/src/__tests__/`):** + - `components/studies/create-study-modal.*.test.tsx` (extend) — toggle hidden when depth = 0; toggle visible with `"narrow"` default when depth ≥ 1; submit payload carries `auto_followup_strategy` (AC-4, AC-5). + - `components/studies/auto-followup-chain-panel.test.tsx` (extend) — strategy badge per link; null link → no badge; mapping table (AC-13, AC-14). + - `lib/enums-overnight-strategy-discipline.test.ts` (new) — value-lock for `OVERNIGHT_STRATEGY_VALUES` (mirrors the existing `enums-convergence-discipline.test.ts` pattern at [`feat_study_convergence_indicator`](../../implemented_features/2026_05_31_feat_study_convergence_indicator/feature_spec.md)). + - `lib/glossary.test.ts` (extend) — value-lock for `overnight_strategy` glossary key (AC-16). +- **E2E (`ui/tests/e2e/`):** + - `overnight-strategy.spec.ts` (new) — seed via API helpers: anchor study (status=completed, depth=2, strategy=follow_suggestions) + completed digest with a `swap_template` executable + a `narrow` executable persisted via the digest test-seeding helper. **Then explicitly enqueue `enqueue_followup_study` via the test harness's Arq pool helper** (per cycle 1 finding C1-B3 — directly seeding a digest does NOT trigger the digest-worker dispatch in tests, so the autopilot job would never run without explicit enqueue). Wait for the child row to land via a polling assertion on `repo.list_children_of_study(anchor.id)` (test helper). Assert child row has `selected_followup_kind = "swap_template"` AND a different `template_id` than the anchor. Navigate to anchor's `/studies/{id}` page; assert chain panel renders the swap_template badge with `data-testid="chain-link-strategy-{child_id}"`. Per `CLAUDE.md` E2E rules — real backend, no `page.route()` mocking. + +## 15) Documentation update requirements + +- `docs/01_architecture/api-conventions.md` — add `AUTO_FOLLOWUP_STRATEGY_INVALID` to the error code table; mention `selected_followup_kind` as an additive field on `StudyChainLink`. +- `docs/01_architecture/data-model.md` — note the three new optional keys on `studies.config` (`auto_followup_strategy`, `auto_followup_visited_template_ids`, `auto_followup_selected_kind`). No schema diagram changes (JSONB inner shape only). +- `docs/01_architecture/ui-architecture.md` — describe the strategy toggle's pair-validator visibility and the chain-panel badge. +- `docs/03_runbooks/agent-debugging.md` (or a new `overnight-strategy-debugging.md`) — operator-facing playbook for the three new structlog events. Specifically: when `auto_followup_no_executable_candidate_fell_back_to_narrow` fires frequently, the upstream signal is usually "study did not converge — digest is leading with text follow-ups." Recommended action: re-run with a larger trial budget (matches the convergence-indicator's `still_improving` recommendation). +- `docs/04_security/` — no change. +- `docs/05_quality/testing.md` — no change. +- `docs/08_guides/tutorial-first-study.md` — extend Step 12 per FR-9 with the strategy sub-section. + +## 16) Rollout and migration readiness + +- **Feature flags / staged rollout:** None. The strategy is opt-in by design — operators see today's behavior unless they explicitly pick `"follow_suggestions"`. No flag needed. +- **Migration/backfill expectations:** None — no schema change. Existing rows satisfy the new contract (absent `auto_followup_strategy` == today's narrow path). +- **Operational readiness gates:** standard CI (lint + typecheck + tests + coverage + smoke) plus the new value-lock vitest + the existing CI enum source-of-truth grep gate. +- **Release gate:** all AC-1 through AC-18 pass; legacy chain-panel tests + legacy auto-followup tests continue passing unmodified. + +## 17) Traceability matrix + +| FR ID | Acceptance Criteria IDs | Planned stories/tasks | Test files/suites | Docs to update | +|---|---|---|---|---| +| FR-1 (config key + validator) | AC-1, AC-2 | Story 1 (backend schemas) | `test_studies_create_contract.py`, schema unit tests | `api-conventions.md` | +| FR-2 (wizard toggle) | AC-4, AC-5 | Story 2 (UI) | `create-study-modal.*.test.tsx`, `enums-overnight-strategy-discipline.test.ts` | `ui-architecture.md` | +| FR-3 (worker dispatch) | AC-3, AC-6, AC-7, AC-9, AC-10, AC-17, AC-18 | Story 3 (backend worker) | `workers/test_auto_followup_strategy.py` (integration — including deleted-swap-target AC-17 + stale-kind-leak AC-18 coverage), `test_auto_followup.py` (existing — unchanged) | `data-model.md` | +| FR-4 (`select_executable_followup`) | AC-6, AC-7, AC-8, AC-9 | Story 3 (backend domain) | `domain/study/test_auto_followup_strategy.py` | — | +| FR-5 (cycle guard state) | AC-7, AC-8, AC-10, AC-11, AC-18 | Story 3 (backend worker) | `workers/test_auto_followup_strategy.py` | `data-model.md` | +| FR-6 (`StudyChainLink` additive field) | AC-11, AC-12 | Story 4 (backend schemas + studies router) | `test_studies_chain_contract.py` | `api-conventions.md` | +| FR-7 (chain panel badges) | AC-13, AC-14 | Story 4 (UI) | `auto-followup-chain-panel.test.tsx`, `overnight-strategy.spec.ts` | — | +| FR-8 (telemetry) | AC-6, AC-8, AC-9, AC-17 | Story 3 (backend worker) | unit + integration assertions on log events (incl. `auto_followup_swap_target_missing` WARN) | runbook | +| FR-9 (glossary + tutorial) | AC-15, AC-16 | Story 5 (docs + UI) | `glossary.test.ts` | `tutorial-first-study.md` | + +## 18) Definition of feature done + +- [ ] All acceptance criteria (AC-1 through AC-18) pass in CI. +- [ ] Backend unit + integration + contract layers green. +- [ ] UI vitest + Playwright E2E green; existing `auto-followup-chain-panel.test.tsx` + `create-study-modal.*.test.tsx` cases still pass unmodified. +- [ ] Coverage gate ≥ 80% holds. +- [ ] Rollout gates from §16 satisfied (no schema change, no migration, no flag). +- [ ] `docs/01_architecture/api-conventions.md` + `data-model.md` + `ui-architecture.md` + `tutorial-first-study.md` updated. +- [ ] Phase 2 + Phase 3 deferred-work tracking files (`phase2_idea.md`, `phase3_idea.md`) exist alongside this spec. +- [ ] No open questions remain in §19. + +## 19) Open questions and decision log + +### Open questions + +- **OQ-1 (resolved at GPT-5.5 cycle 1, finding C1-B1)** — How does the chain-panel badge resolve the "short template name" for a `swap_template` link's display? **Resolved as D-11**: per-link `GET /api/v1/query-templates/{id}` fetch from the frontend (FR-7 updated). Rationale: at most 0–5 extra small fetches per chain, already TanStack-Query-cached client-side, keeps `/chain`'s response shape stable. +- **OQ-2 (resolved at GPT-5.5 cycle 2, finding C2-B3)** — Should the strategy toggle ALSO show as a read-only line on the study detail page? **Resolved as D-15**: deferred to Phase 2 (`phase2_idea.md`). The chain-panel badges per link (FR-7) already surface the strategy a chain link followed; an extra detail-page line would be a redundant secondary surface. If operator feedback during MVP2 says the chain panel is too far down the page to spot quickly, Phase 2 picks it up as part of the morning summary card scope. + +_No open questions remain — §18's "no open questions" gate is satisfied._ + +### Decision log + +- **D-1 (2026-06-03)** — Deliberately depart from `feat_overnight_autopilot`'s anti-pattern "do not modify `enqueue_followup_study`". Rationale: that anti-pattern guarded the parent spec's read-only/UI-only scope; this is a deliberate capability extension behind an opt-in toggle. The legacy `"narrow"` path is preserved byte-identically — every existing study and every operator who doesn't change anything keeps today's behavior. The departure is logged here so future readers don't mistake it for drift. +- **D-2 (2026-06-03)** — Strategy is **inherited verbatim** down the chain (idea Fork: locked). Rationale: mid-chain mode switching would break the cycle-guard contract — `visited_template_ids` would need conditional accumulation, which adds bug surface without clear operator value. Operators choose at create time. +- **D-3 (2026-06-03)** — On no executable candidate, **fall back to narrow** (idea Fork A: locked recommended default). Rationale: chain never stalls; depth budget is never wasted; the operator gets *some* exploration even when the digest is text-heavy. The fallback fires a distinct telemetry event so the operator can grep for "this chain didn't use the broader strategy at link N" without ambiguity. +- **D-4 (2026-06-03)** — Make `follow_suggestions` an **opt-in toggle**, not the new default (idea Fork C: locked recommended default). Rationale: the existing narrow loop works correctly and predictably; changing the default would surprise every existing operator. Opt-in lets new operators discover the broader behavior without breaking trust for current ones. +- **D-5 (2026-06-03)** — **Trust the digest's existing ordering**, not a kind-preference policy (idea Fork: trust digest order). Rationale: the digest's system prompt already encodes convergence-aware ordering ("lead with text re-run-with-bigger-budget when not converged; lead with narrow/widen when converged"). Re-ranking inside the autopilot would duplicate that logic AND require the autopilot to consume the convergence verdict — adding coupling without value. First-executable-by-index is the clean rule. +- **D-6 (2026-06-03)** — **No new follow-up kind.** The four-kind taxonomy is locked. Rationale: every new kind would change the digest's structured-output schema + the parse_followup_list contract + downstream consumers. Not justified by the cross-knob exploration goal. +- **D-7 (2026-06-03)** — **No new LLM call** in `enqueue_followup_study`. Rationale: the digest worker already paid the LLM cost and persisted structured output. The autopilot is a pure-DB-read consumer. Adding an LLM call here would re-introduce capability-check + budget-gate + retry surface that the digest already handles. +- **D-8 (2026-06-03)** — **No proposal `superseded` status in Phase 1.** Rationale: requires migration on the `proposals.status` CHECK constraint + a UX decision on whether superseded proposals appear in `/proposals`. Phase 1's `/chain` endpoint's `best_link_id` + `proposal_id_for_best_link` already give the operator a single morning artifact; Phase 3 polishes the rollup further when the friction is felt. +- **D-9 (2026-06-03)** — **Cycle guard is template-based, not search-space-based.** A re-narrowed visit to the *same* template with *different* bounds is allowed (the digest's `narrow`/`widen` on `parent.template_id` doesn't trigger the guard at all — only `swap_template` does, and only against the visited-template set). Rationale: bound-set comparison adds complexity for an attack the guard doesn't need to cover; the goal is preventing template ping-pong, not preventing legitimate re-narrows. +- **D-10 (2026-06-03)** — **Convergence-gated progression is provided by the existing `evaluate_chain_gate` SKIP_NO_LIFT branch — NOT a new stop reason.** When a chain link is `converged` per [`feat_study_convergence_indicator`](../../implemented_features/2026_05_31_feat_study_convergence_indicator/feature_spec.md) (trailing-window improvement ≤ epsilon `0.005`), its `best_metric − baseline_metric` is also ≤ epsilon by construction. The existing chain gate at [`backend/app/domain/study/auto_followup.py:127`](../../../../backend/app/domain/study/auto_followup.py#L127) already SKIPs with `no_lift` in that case, and the existing `/chain` endpoint already reports `stop_reason = "no_lift"`. Rationale: the chain naturally terminates at the converged link without modifying the gate; introducing a `"converged"` stop reason would duplicate the verdict the convergence indicator already surfaces per-link via FR-7 soft contract. The idea's Cap 2 goal is satisfied by composition, not by new infrastructure. (This also means the cross-model reviewer SHOULD NOT propose a new `"converged"` stop reason — it would be redundant with the existing `no_lift` value.) +- **D-11 (2026-06-03, GPT-5.5 cycle 1 finding C1-B1 accept)** — Frontend resolves the `swap_template` link's display name via a per-link `GET /api/v1/query-templates/{id}` fetch, NOT via a new `template_name` field on `StudyChainLink`. Rationale: at most 0–5 extra small fetches per chain (one per `swap_template`-badged link), already TanStack-Query-cached client-side, keeps `/chain`'s response shape stable, avoids forcing the backend chain-summary query to join `query_templates` for a value the frontend already loads in many adjacent contexts. +- **D-12 (2026-06-03, GPT-5.5 cycle 1 findings C1-A1 + C1-A5 accept)** — **Persistence contract for the new `studies.config` keys:** + - `auto_followup_selected_kind` is persisted ONLY when the worker took a selection-driven path (`"narrow"` / `"widen"` / `"swap_template"`) OR a `follow_suggestions` fallback path (`"narrow_default"`). The **legacy/default path** (strategy `None` / `"narrow"`) persists **no key** — the worker explicitly pops it before INSERT so a parent's lingering value never leaks to the child. This keeps the chain panel byte-identical for legacy chains. + - `auto_followup_visited_template_ids` is persisted ONLY under `"follow_suggestions"` strategy. The list is **ordered-unique** via `list(dict.fromkeys(...))`; appending a template equal to one already in the list is a no-op for the list contents. + - Rationale: a single, unambiguous contract everywhere (FR-3, FR-5, FR-6, AC-3, AC-6, AC-9, AC-12, AC-18 all reconcile). The earlier draft had FR-3 and FR-5/AC-12 contradicting each other on the legacy-path persistence — D-12 resolves in favor of the clean-legacy contract. +- **D-13 (2026-06-03, GPT-5.5 cycle 1 finding C1-A3 accept)** — `auto_followup_strategy` field type is `str | None` (NOT `Literal[...]`). The pair-and-value check happens in the `_validate_auto_followup_strategy` model_validator with the message-prefix path so the canonical `AUTO_FOLLOWUP_STRATEGY_INVALID` error code reaches the response envelope. Mirrors the existing `_validate_auto_followup_depth` pattern — a Pydantic `Literal[...]` at field-level would surface generic `VALIDATION_ERROR` for unknown values, violating §8.6's error-code contract. +- **D-14 (2026-06-03, GPT-5.5 cycle 1 finding C1-A4 accept)** — The wizard does NOT write `auto_followup_visited_template_ids`. The worker is the sole writer. The anchor's missing key is treated as `[anchor.template_id]` by the worker. The create-study contract test asserts a wizard-submitted `auto_followup_visited_template_ids` is 422-rejected. Rationale: single-writer rule eliminates the "two writers must agree on the seed value" coordination surface. +- **D-15 (2026-06-03, GPT-5.5 cycle 2 finding C2-B3 accept)** — Strategy read-only line on the study detail page (OQ-2) is deferred to Phase 2 (`phase2_idea.md`). The FR-7 per-link chain-panel badges are sufficient for MVP2; an extra detail-page line is redundant and would crowd the existing detail-page layout. Phase 2 picks it up if operator feedback says the chain panel is too far down to spot quickly during morning review. diff --git a/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/idea.md b/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/idea.md new file mode 100644 index 00000000..47c54311 --- /dev/null +++ b/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/idea.md @@ -0,0 +1,76 @@ +# Overnight → final solution (autonomous cross-knob tuning to one ship-ready config) + +**Date:** 2026-06-03 +**Status:** Idea — user request during a Q&A session about the overnight autopilot's reach +**Priority:** P1 +**Origin:** Operator stated goal: *"run the overnight process and in the morning have a final solution."* Surfaced while reviewing proposals at `/proposals/019e8e65-...` and learning that "Run overnight (compound automatically)" only narrows the anchor study's same knobs — it never branches to a different parameter or template, and it leaves a tree of per-link proposals rather than one answer. +**Depends on:** `feat_overnight_autopilot` (shipped 2026-05-31, PR #343) — this extends its chaining engine. + +> **Priority guidance:** P1 — explicit operator-requested capability with a clear product goal ("morning = a final solution"). Scoped, high-value, ready to execute. Not P0 only because the narrow-only loop works correctly today and nothing is on fire. + +## North-star goal + +The operator starts an overnight run and, in the morning, has **one final, ship-ready tuned configuration** — explored across the relevant knobs and templates, converged, and packaged as a **single proposal/PR** — without babysitting the chain or choosing among a tree of intermediate proposals. + +## Problem + +Two gaps separate today's autopilot from that goal: + +**Gap 1 — Reach (it only narrows the same knobs).** The overnight loop is a deterministic narrowing chain: each link re-runs the *same template* with the *same knobs*, bounds narrowed ±50% around the prior winner ([`backend/workers/auto_followup.py:211-215`](../../../../backend/workers/auto_followup.py#L211-L215) `narrow_bounds_around_winner(..., bracket=0.5)`; [`auto_followup.py:238`](../../../../backend/workers/auto_followup.py#L238) hardcodes `template_id=parent.template_id`). It **never reads** `digest.suggested_followups`, so the `widen` / `swap_template` cards the digest already produces — the only path to a *different* knob set or template — are dead for automation. A "final" answer can't be reached if the loop only ever refines the one knob the operator started with. + +**Gap 2 — Rollup (no single answer).** Every completed study auto-creates its own `pending` proposal ([`backend/workers/orchestrator.py`](../../../../backend/workers/orchestrator.py) `_on_study_complete`). An overnight chain of up to 6 studies (anchor + depth 1–5) therefore yields **up to 6 proposals**. There is no surface that says "across the whole chain, *this* is the winning config." The operator still has to compare links by hand in the morning — the opposite of "a final solution." + +Delivering the goal requires closing **both** gaps: autonomous exploration across knobs/templates (so the result is actually complete), *and* a rollup that selects the global-best link and presents it as the one final proposal. + +## Proposed capabilities + +### Cap 1 — Autonomous cross-knob / cross-template exploration + +- On each chain link, after the parent's digest is generated, the autopilot reads `digest.suggested_followups` and may act on the top-ranked **executable** follow-up (`narrow` | `widen` | `swap_template` — never `text`, which carries `search_space: null`) instead of always synthesizing a ±50% narrow. +- A `swap_template` link creates the child against the **proposed template** (not `parent.template_id`) with the digest's remapped search space — this is what lets the chain move onto a different knob set. +- `widen` / `narrow` links run the broadened / tightened bounds the digest emitted. +- Fall back to today's deterministic ±50% narrow when a link has no executable follow-up (chain never stalls; see Fork A). +- Honor the digest's convergence-aware ordering ([`prompts/digest_narrative.system.md:99-121`](../../../../prompts/digest_narrative.system.md#L99-L121)): when the parent is `still_improving` / `too_few_trials` the digest already demotes `narrow`/`widen` and leads with "re-run with a larger budget" — the autopilot should follow that rather than narrowing a study that hasn't converged. + +### Cap 2 — Convergence-gated progression (so "final" means done, not just out of budget) + +- Progression continues while there is forward lift AND an executable follow-up worth exploring; it stops when the tail link is `converged` AND no remaining executable follow-up adds lift above the epsilon. +- Preserve every existing stop condition (`depth_exhausted`, `no_lift`, `budget`, `parent_failed`, `cancelled`, `in_flight`) and the 6-study cap (`_validate_auto_followup_depth`, `0 ≤ depth ≤ 5`). +- Cycle / no-regress guard: a `swap_template` → `swap_template` ping-pong, or a `widen` that undoes a prior `narrow`, must be prevented via a visited-set threaded through `config` (like `auto_followup_depth`) so the chain makes monotonic progress and terminates. + +### Cap 3 — Chain rollup → one final proposal (the morning artifact) + +- When the chain terminates, select the **global-best link** across the entire tree (best `primary_metric` in the objective direction, convergence-confirmed) and surface it as **the** recommended proposal. +- Mark the intermediate links' proposals as `superseded` (or secondary) so the operator sees one ship-ready answer, with the others available as the explored path/history (see Fork B). +- The final proposal's `metric_delta` should be expressed against the **original anchor baseline**, not just the immediate parent — "here's the total lift from where you started" is the number the operator ships on. + +### Cap 4 — Morning summary surface + +- A single view (ties into [`feat_overnight_studies_summary_card`](../feat_overnight_studies_summary_card/)) that shows: the final recommended config, total lift vs the anchor baseline, the convergence verdict, and the path the chain took (which knobs/templates it explored, link by link). +- The autopilot chain panel already surfaces per-link convergence verdicts (FR-7 soft contract) — extend it to mark the winning link and show each link's follow-up kind (narrow / widen / swap). + +## Scope signals + +- **Backend:** Core change in [`backend/workers/auto_followup.py`](../../../../backend/workers/auto_followup.py) — replace the unconditional narrow with follow-up selection that can branch `template_id` and consume a `search_space` straight from the parent's persisted digest (today the worker only loads the parent study + best trial; it must now also load `digest.suggested_followups`). Selection/ranking + global-winner rollup are natural new **pure domain functions** under `backend/app/domain/study/` (unit-testable). Rollup likely needs a repo query that walks the chain by `parent_study_id` and ranks links. Proposal-supersede is a new status transition on the proposals aggregate. +- **Frontend:** Morning summary card + chain-panel winner marker (Cap 4). Possibly a wizard mode/toggle if cross-knob chaining is opt-in (Fork C). +- **Migration:** Possibly a `superseded` value added to the proposal status CHECK/enum (Cap 3) — confirm at spec time. The chain/visited-set state lives in `studies.config` JSONB, no column needed. +- **Config:** Likely a new `config` key (e.g. `auto_followup_strategy: "narrow" | "follow_suggestions"`) alongside `auto_followup_depth`. No new env var expected. +- **Audit events:** N/A pre-MVP3 (no `audit_log` until Observable). Existing structlog chain events should gain the selected follow-up kind, source→target template ids (for swaps), and the winning-link selection. + +## Open forks to resolve at spec time + +- **Fork A — no executable follow-up on a link.** Fall back to today's ±50% narrow (chain never stalls) vs stop with a new `no_executable_followup` reason. **Recommended: fall back to narrow** so depth budget is never wasted; record the per-link strategy. +- **Fork B — fate of intermediate proposals.** Mark non-winning links `superseded` (clean single answer, needs a status value + migration) vs leave them `pending` and just *badge* the winner (no migration, more morning clutter). **Recommended: supersede** — it's what delivers "one final solution," and the explored path stays visible as history. +- **Fork C — default vs opt-in.** Make follow-up-aware chaining the new default for the overnight mode, an opt-in toggle, or a distinct wizard mode. **Recommended: opt-in toggle** (`auto_followup_strategy`) so the predictable narrow-only behavior remains available. +- **Fork D — "final" definition / budget vs completeness.** Cap at depth 1–5 as today (predictable cost, may stop before fully converged) vs "run until converged or budget-capped" (truer to "final," less predictable cost). **Recommended: keep the depth + daily-budget caps** and define "final" honestly as *best config found across what was explored, convergence-confirmed* — not a provable global optimum. + +## Honesty note on "final solution" + +"Final" here means **the best configuration found across the knobs/templates the chain explored overnight, with convergence confirmed** — bounded by which follow-ups the digest surfaced and the depth/daily-budget caps. It is not a proof of global optimality across the entire possible search space. The morning artifact should state this plainly (e.g. "best of N configs explored; converged") so the operator ships with calibrated confidence. + +## Relationship to other work + +- **Extends** [`feat_overnight_autopilot`](../../implemented_features/2026_05_31_feat_overnight_autopilot/) — directly modifies its chaining worker. +- **Depends-on / feeds** [`feat_overnight_studies_summary_card`](../feat_overnight_studies_summary_card/) (02_mvp2) — the morning summary surface; coordinate so the card renders the rolled-up winner. +- **Adjacent to** [`chore_auto_followup_parent_advisory_lock`](../chore_auto_followup_parent_advisory_lock/) (02_mvp2) — concurrency hardening on the same worker; land cleanly together. +- **Consumes** the existing follow-up taxonomy in [`backend/app/domain/study/followups.py`](../../../../backend/app/domain/study/followups.py) and digest generation in [`backend/workers/digest.py`](../../../../backend/workers/digest.py) — no new follow-up kinds; this teaches the autopilot to *act* on the kinds that already exist and to *roll them up* into one answer. diff --git a/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/implementation_plan.md b/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/implementation_plan.md new file mode 100644 index 00000000..e90c06a2 --- /dev/null +++ b/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/implementation_plan.md @@ -0,0 +1,648 @@ +# Implementation Plan — Overnight → final solution (autonomous cross-knob tuning) + +**Date:** 2026-06-03 +**Status:** Ready for Execution +**Primary spec:** [`feature_spec.md`](feature_spec.md) +**Policy source(s):** [`CLAUDE.md`](../../../../CLAUDE.md) (Absolute Rules), [`docs/01_architecture/api-conventions.md`](../../../../01_architecture/api-conventions.md) + +--- + +## 0) Planning principles + +- Spec traceability first: every story maps to FR IDs from `feature_spec.md` §17. +- **No migration in Phase 1** — all new state is JSONB keys on `studies.config`. Alembic head stays `0022_solr_engine_auth_check`. +- The legacy `"narrow"` path must stay behaviorally byte-identical (per P1-B2): a parent with NO `auto_followup_strategy` key produces a child with identical search-space + template_id + telemetry + NO new config keys. A parent with explicit `"narrow"` produces an identical child EXCEPT it inherits the `auto_followup_strategy: "narrow"` key (the one expected config delta) — still no selected/visited keys, no new telemetry. Backward-compatibility is a hard gate proven by `test_auto_followup.py` passing unmodified. +- Pure-domain selection logic (`select_executable_followup`) is unit-tested without fixtures; the worker dispatch is integration-tested DB-backed. +- The `/chain` endpoint, `StudyChainLink`, `StudyChainResponse`, and `chain_summary.py` all already exist (shipped by `feat_overnight_autopilot`). This plan EXTENDS them additively — it does not create them. + +## 1) Scope traceability (FR → epics/stories) + +| FR ID | Epic / Story | Notes | +|---|---|---| +| FR-1 (config key + validator) | Epic 1 / Story 1.1 | `auto_followup_strategy: str \| None` + `_validate_auto_followup_strategy` + `AUTO_FOLLOWUP_STRATEGY_VALUES` constant | +| FR-2 (wizard toggle) | Epic 1 / Story 1.2 | Strategy ` wire values import from ui/src/lib/enums.ts *_VALUES arrays (form-select-discipline rule) +- Glossary entries carry short (≤120 char) + long; value-lock vitest asserts the shape +- All new module-level enum constants carry a // source-of-truth comment + are grepped by + scripts/ci/verify_enum_source_of_truth.sh +``` + +### AI Agent Execution Protocol + +0. Read `architecture.md` + `state.md` before Story 1.1. +1. Implement Epic 1 (schema + wizard) → Epic 2 (worker — the core) → Epic 3 (chain surface) → Epic 4 (docs). +2. Backend order within a story: domain → schemas → worker → router. +3. Run `make test-unit` + targeted `make test-integration` + `make test-contract` after each backend story; `cd ui && pnpm test` after each frontend story. +4. No migration round-trip needed (no schema change). +5. After the final story, update `state.md` + `architecture.md` + run `bash scripts/regen-generated-artifacts.sh` (the `selected_followup_kind` additive field changes the OpenAPI snapshot + `types.ts`). + +--- + +## Epic 1 — Strategy wire contract + wizard surface + +### Story 1.1 — `auto_followup_strategy` config key + validator +**Outcome:** The API accepts `config.auto_followup_strategy ∈ {"narrow","follow_suggestions"}` (or absent/null), 422-rejects bad values + pair-rule violations with `AUTO_FOLLOWUP_STRATEGY_INVALID`, and 422-rejects an operator-submitted `auto_followup_visited_template_ids` (single-writer rule per D-14). + +**New files** + +| File | Purpose | +|---|---| +| (none) | All changes are additive edits to existing files. | + +**Modified files** + +| File | Change | +|---|---| +| [`backend/app/api/v1/schemas.py`](../../../../backend/app/api/v1/schemas.py) | Add `auto_followup_strategy: str \| None = Field(default=None)` to `StudyConfigSpec` (after `auto_followup_depth` at line 716); add module-level `AUTO_FOLLOWUP_STRATEGY_VALUES: tuple[str, ...] = ("narrow", "follow_suggestions")`; add `_validate_auto_followup_strategy` `@model_validator(mode="after")` after `_validate_auto_followup_depth` (line 736); add a SEPARATE `@model_validator(mode="before")` rejecting an operator-submitted `auto_followup_visited_template_ids` (see Task 4 — `mode="before"` is REQUIRED because `StudyConfigSpec` defaults to `extra="ignore"`, which silently drops unknown keys before any `mode="after"` validator runs). | +| [`backend/app/api/errors.py`](../../../../backend/app/api/errors.py) | Add `"AUTO_FOLLOWUP_STRATEGY_INVALID"` to `_CUSTOM_ERROR_CODE_ALLOWLIST` (frozenset at lines 63-68). **This is required** — the prefix unwrap is NOT automatic; the allowlist is the authoritative whitelist (errors.py:58-60 comment: "adding a new code requires adding it here in the same PR that introduces the validator"). Without this, `AUTO_FOLLOWUP_STRATEGY_INVALID:` surfaces as a generic `VALIDATION_ERROR`, breaking AC-1/AC-2. | + +**Endpoints** + +| Method | Path | Request body | Success response | Error codes | +|---|---|---|---|---| +| `POST` | `/api/v1/studies` (existing) | `{..., config: {auto_followup_depth, auto_followup_strategy}}` | `201` (existing shape) | `AUTO_FOLLOWUP_STRATEGY_INVALID` (422) | + +**Key interfaces** + +```python +# backend/app/api/v1/schemas.py +AUTO_FOLLOWUP_STRATEGY_VALUES: tuple[str, ...] = ("narrow", "follow_suggestions") +# Source-of-truth for the frontend OVERNIGHT_STRATEGY_VALUES mirror. + +class StudyConfigSpec(BaseModel): + ... + auto_followup_strategy: str | None = Field(default=None) + # str | None (NOT Literal) per spec D-13 — the canonical error-code unwrap + # requires the validator's message-prefix path. + + @model_validator(mode="after") + def _validate_auto_followup_strategy(self) -> "StudyConfigSpec": ... + # 1. None → return early. + # 2. value not in AUTO_FOLLOWUP_STRATEGY_VALUES → raise ValueError( + # "AUTO_FOLLOWUP_STRATEGY_INVALID: auto_followup_strategy must be 'narrow' " + # "or 'follow_suggestions'; got ''") + # 3. value set but auto_followup_depth in (None, 0) → raise ValueError( + # "AUTO_FOLLOWUP_STRATEGY_INVALID: auto_followup_strategy only applies when " + # "auto_followup_depth >= 1") +``` + +**Tasks** +1. Add the `AUTO_FOLLOWUP_STRATEGY_VALUES` constant + source-of-truth comment. +2. Add the `auto_followup_strategy` field to `StudyConfigSpec`. +3. Add `_validate_auto_followup_strategy` `@model_validator(mode="after")` (value-rule + pair-rule, both raising the `AUTO_FOLLOWUP_STRATEGY_INVALID:` prefix). +4. Add the `auto_followup_visited_template_ids` reject guard as a `@model_validator(mode="before")` (operator may not seed the cycle-guard list — single-writer rule per D-14). **`mode="before"` is required**: `StudyConfigSpec` has NO `model_config` today (Pydantic default `extra="ignore"`), so an unknown key is dropped before a `mode="after"` validator could see it. The before-validator inspects the raw dict: if `"auto_followup_visited_template_ids" in values` (or `auto_followup_selected_kind`), raise `ValueError("AUTO_FOLLOWUP_STRATEGY_INVALID: auto_followup_visited_template_ids is worker-managed and may not be set at study creation")`. **Do NOT add blanket `extra="forbid"`** — it risks rejecting the worker's own JSONB keys if a stored config is ever re-validated through `StudyConfigSpec`, and broadens the blast radius beyond the two worker-managed keys. +5. **Add `"AUTO_FOLLOWUP_STRATEGY_INVALID"` to `_CUSTOM_ERROR_CODE_ALLOWLIST` in `backend/app/api/errors.py`** (lines 63-68). This is mandatory — the prefix unwrap is gated by this allowlist, not automatic. Verify with the existing `AUTO_FOLLOWUP_DEPTH_OUT_OF_RANGE` entry as the pattern. + +**Definition of Done (DoD)** +- `make test-unit` green incl. new schema unit tests for the validator (value-rule, pair-rule, None-early-return). +- Contract test (`test_studies_create_contract.py`) asserts: (a) `auto_followup_strategy: "follow_suggestions"` + `auto_followup_depth: 3` round-trips 201 (AC-5 backend half); (b) `"follow_suggestions"` + no depth → 422 `AUTO_FOLLOWUP_STRATEGY_INVALID` (AC-1); (c) `"garbage"` + depth 3 → 422 `AUTO_FOLLOWUP_STRATEGY_INVALID` (AC-2); (d) operator-submitted `auto_followup_visited_template_ids` → 422 (D-14). +- `bash scripts/ci/verify_enum_source_of_truth.sh` passes for the new constant. + +### Story 1.2 — Wizard strategy toggle + `overnight_strategy` glossary key +**Outcome:** Step 5 of the create-study modal shows a Strategy `` after the depth selector block (after line ~1490, after the depth `` Strategy toggle** — label `"Strategy"` + `InfoTooltip glossaryKey="overnight_strategy"`; `data-testid="cs-overnight-strategy"`; options from `OVERNIGHT_STRATEGY_VALUES.map(...)` with display labels `"narrow"` → `"Refine the same knobs (predictable)"`, `"follow_suggestions"` → `"Try suggested follow-ups (broader exploration)"`; helper text per spec FR-2. Data source: form state. Interaction: writes `config.auto_followup_strategy` on submit. +- **Visibility:** rendered only when `values.auto_followup_depth >= 1` (mirror the FR-2 hint conditional at line 1443). + +**State dependency analysis** +``` +State added: auto_followup_strategy (form field, default "narrow") +Referenced by: + - create-study-modal submit handler (~line 728) — action: write to config only when depth >= 1 + - the new ` in `create-study-modal.tsx` | + +**Tasks** +1. Add `OVERNIGHT_STRATEGY_VALUES` to `enums.ts` + the discipline vitest. +2. Add the `overnight_strategy` glossary entry + the glossary value-lock assertion. +3. Add the Strategy `` (lines 1460-~1490, label `🌙 Run overnight (compound automatically)`, `data-testid="cs-auto-followup"`, `InfoTooltip glossaryKey="overnight_autopilot"`). **Insertion point for the Strategy toggle:** immediately after the depth ` — mirror the existing depth selector at create-study-modal.tsx:1460-1490. + Use the *_VALUES.map() form-select-discipline pattern (NOT inline ). */} +{values.auto_followup_depth !== undefined && values.auto_followup_depth >= 1 && ( +
+
+ + +
+ +

+ Refine: each follow-up tightens around the previous winner on the same knobs. + Try suggestions: each follow-up acts on the digest's top runnable recommendation, + which may switch knobs or templates. Refine is the safer default; Try suggestions explores broader. +

+
+)} +``` + +```tsx +{/* Per-link strategy badge — inside chain.links.map at auto-followup-chain-panel.tsx:191. + // Values must match backend/app/domain/study/auto_followup_strategy.py SELECTED_FOLLOWUP_KIND_VALUES */} +{link.selected_followup_kind && ( + + {link.selected_followup_kind === 'narrow_default' ? 'refined' + : link.selected_followup_kind === 'narrow' ? 'narrow ↓' + : link.selected_followup_kind === 'widen' ? 'widen ↑' + : `swapped to ${swapTemplateName ?? '…'}`} + +)} +``` + +### Layout and structure +- Strategy toggle: same `space-y-1.5` vertical rhythm as adjacent Step-5 controls; stacked below the depth selector. +- Badge: inline, trailing the link's metric, muted text weight so it doesn't compete with the name. + +### Information architecture placement +- Strategy toggle lives in Step 5 of the create-study modal, directly below the existing overnight depth selector — no new step, no new screen. +- Badge lives inline in the existing chain panel on `/studies/{id}` — no new surface. + +### Tooltips and contextual help +| Element | Glossary key | Source-of-truth comment | Pattern | +|---|---|---|---| +| Strategy `` and a new badge); no component is removed or rewritten. + +### Client-side persistence +Not applicable — no `localStorage`/`sessionStorage`. The strategy is form state submitted to the backend. + +--- + +## 3) Testing workstream + +### 3.1 Unit tests +- Location: `backend/tests/unit/` +- Tasks: + - [ ] `domain/study/test_auto_followup_strategy.py` (NEW) — `select_executable_followup` matrix (Story 2.1 DoD list). + - [ ] `api/` schema unit tests for `_validate_auto_followup_strategy` (Story 1.1) — value-rule, pair-rule, None-early-return. +- DoD: critical branches deterministic. + +### 3.2 Integration tests +- Location: `backend/tests/integration/` +- Tasks: + - [ ] `backend/tests/integration/test_auto_followup_strategy.py` (NEW — flat path, matching the existing `test_auto_followup.py` convention; NOT under `integration/workers/`) — DB-backed worker dispatch: AC-3, AC-6, AC-7, AC-8 (worker-level), AC-9, AC-10, AC-17, AC-18 + exception-fallback + telemetry-event assertions. (Owned by Story 2.2 DoD.) + - [ ] `backend/tests/integration/test_studies_chain_api.py` (EXTEND) — `selected_followup_kind` + `template_id` population (AC-11, AC-12) + malformed-config coercion. (Owned by Story 3.1 DoD.) +- DoD: happy path + fallback + cycle-guard + deleted-swap-target + exception-fallback + legacy-parity covered. + +### 3.3 Contract tests +- Location: `backend/tests/contract/` +- Tasks: + - [ ] `test_studies_create_contract.py` (EXTEND) — `AUTO_FOLLOWUP_STRATEGY_INVALID` (AC-1, AC-2), round-trip (AC-5 half), visited-list reject (D-14). + - [ ] `test_studies_chain_contract.py` (EXTEND) — `selected_followup_kind` optional field + enum values (AC-11). +- DoD: the one new error code (`AUTO_FOLLOWUP_STRATEGY_INVALID`) has contract coverage. + +### 3.4 E2E tests +- Location: `ui/tests/e2e/` +- Tasks: + - [ ] `ui/tests/e2e/overnight-strategy.spec.ts` (NEW) — seed anchor (depth=2, strategy=follow_suggestions) + digest with swap_template + narrow executables via API helpers; **explicitly enqueue `enqueue_followup_study` via the test Arq helper** (cycle 1 finding C1-B3); poll `list_children_of_study` for the child; assert child `selected_followup_kind = "swap_template"` + different `template_id`; navigate to `/studies/{anchor}`; assert the swap_template badge renders. Real backend, no `page.route()`. **Owned by Story 3.2 DoD** (per P1-A4). +- DoD: tests use `page` for browser assertions; setup via `request`. + +### 3.5 Existing test impact audit +| Test file | Pattern | Count | Action | +|---|---|---|---| +| `backend/tests/integration/test_auto_followup.py` | legacy narrow-path dispatch | ~existing | No change — legacy path is byte-identical; tests must stay green unmodified (the backward-compat gate). | +| `backend/tests/integration/test_studies_chain_api.py` | chain endpoint shape | ~existing | Extend with `selected_followup_kind` cases; existing assertions unchanged (additive field). | +| `backend/tests/contract/test_studies_chain_contract.py` | chain response schema | ~existing | Extend; existing assertions unchanged. | +| `ui/src/__tests__/components/studies/auto-followup-chain-panel.test.tsx` | panel rendering | ~existing | Extend with badge cases; existing cases unchanged. | +| `ui/src/__tests__/components/studies/create-study-modal.*.test.tsx` | wizard | ~existing | Extend with strategy-toggle cases; existing depth-selector assertions unchanged. | + +### 3.5 Migration verification +Not applicable — no schema change in Phase 1. Alembic head stays `0022_solr_engine_auth_check`. + +### 3.6 CI gates +- [ ] `make test-unit` +- [ ] `make test-integration` +- [ ] `make test-contract` +- [ ] `cd ui && pnpm test` +- [ ] `cd ui && pnpm lint && pnpm typecheck && pnpm build` +- [ ] `bash scripts/regen-generated-artifacts.sh` (clean tree — `selected_followup_kind` changes the OpenAPI snapshot) + +--- + +## 4) Documentation update workstream + +### 4.0 Core context files +- [ ] `state.md` — update Last-5-merges + current-branch context on merge (Epic 4 / finalization). +- [ ] `architecture.md` — note the autopilot's strategy-aware dispatch + the `selected_followup_kind` surface. +- [ ] `CLAUDE.md` — no new Absolute Rule; optionally note the `auto_followup_strategy` config key under Settings conventions if warranted. + +### 4.1 Architecture docs +- [ ] `api-conventions.md` (Story 4.1), `data-model.md` (Story 4.1), `ui-architecture.md` (Story 4.1). + +### 4.3 Runbooks +- [ ] Autopilot strategy events runbook (Story 4.1). + +### 4.6 Guides +- [ ] `tutorial-first-study.md` Step 12 strategy sub-section (Story 4.1). + +**Documentation DoD** +- [ ] `state.md` + `architecture.md` consistent with shipped behavior. +- [ ] Docs/01 + /03 + /08 consistent with the contract. + +--- + +## 5) Lean refactor workstream + +### 5.1 Refactor goals +- None required — this is a purely additive feature. The legacy narrow path is preserved verbatim (the backward-compat gate forbids refactoring it). + +### 5.2 Planned refactor tasks +- [ ] None. Resist the temptation to "clean up" `enqueue_followup_study` while adding the dispatch — the byte-identical legacy-path requirement (AC-3) makes any refactor a regression risk. + +### 5.3 Refactor guardrails +- [ ] `test_auto_followup.py` passes unmodified — proof the legacy path is untouched. + +--- + +## 6) Dependencies, risks, and mitigations + +### Dependencies +| Dependency | Needed by | Status | Risk if missing | +|---|---|---|---| +| `feat_digest_executable_followups_swap_template` (persisted remap) | Story 2.2 | Implemented (PR #232) | High — without persisted remap the worker would need to re-remap. Locked. | +| `feat_overnight_autopilot` (`/chain` + `StudyChainLink` + panel) | Story 3.1, 3.2 | Implemented (PR #343) | N/A — shipped. | +| `parse_followup_list` defensive ingest | Story 2.2 | Implemented (PR #225) | N/A — shipped. | + +### Risks +| Risk | Likelihood | Impact | Mitigation | +|---|---|---|---| +| Refactoring the legacy worker path while inserting the dispatch breaks byte-identical behavior | M | H | `test_auto_followup.py` unmodified-pass gate; dispatch inserted as a discrete branch, not a rewrite. | +| `repo.get_digest_for_study` accessor name wrong in the plan | M | L | Story 2.2 Task 1 greps the repo layer to confirm the actual name before coding. | +| `StudyConfigSpec` not `extra="forbid"` → visited-list reject (D-14) needs a targeted guard | M | L | Story 1.1 Task 4 reads the model first; chooses targeted check vs `extra="forbid"` based on what won't break existing keys. | + +### Failure mode catalog +| Failure mode | Trigger | Expected behavior | Recovery | +|---|---|---|---| +| Digest row missing under follow_suggestions | manual digest deletion mid-chain | WARN + fall back to narrow | auto (chain continues) | +| Swap target template deleted | template hard-deleted between digest + dispatch | `auto_followup_swap_target_missing` WARN + fall back to narrow | auto | +| Malformed `config.auto_followup_selected_kind` in DB | manual INSERT / schema drift | coerce to null + `chain_selected_kind_unknown` WARN; no 500 | auto | +| All executable candidates cycle-dropped | digest emits only swap_templates to visited templates | `selected=None` → fallback narrow; `dropped_template_ids` populated on the fallback event | auto | + +## 7) Sequencing and parallelization + +### Suggested sequence +1. Epic 1 Story 1.1 (schema — unblocks the wire contract). +2. Epic 2 Story 2.1 (pure selector — unblocks 2.2; parallelizable with 1.2). +3. Epic 2 Story 2.2 (worker dispatch — the core; depends on 1.1 + 2.1). +4. Epic 1 Story 1.2 (wizard — depends on 1.1's enum constant; parallelizable with Epic 2). +5. Epic 3 Story 3.1 (chain field — depends on 2.1's `SELECTED_FOLLOWUP_KIND_VALUES`). +6. Epic 3 Story 3.2 (panel badge — depends on 3.1). +7. Epic 4 Story 4.1 (docs — last). + +### Parallelization opportunities +- Story 2.1 (pure domain) + Story 1.2 (wizard) can run in parallel after 1.1. +- Story 3.1 can start once 2.1's enum constant lands (doesn't need 2.2). + +## 8) Rollout and cutover plan + +- **Rollout:** no flag, no migration. The strategy is opt-in by design — operators see today's behavior until they pick `"follow_suggestions"`. +- **Cutover:** none. Existing chains continue on the legacy path. +- **Reconciliation:** none — no external systems. + +## 9) Execution tracker + +### Current sprint +- [ ] Story 1.1 — config key + validator +- [ ] Story 1.2 — wizard toggle + glossary key +- [ ] Story 2.1 — pure-domain selector +- [ ] Story 2.2 — worker dispatch + cycle guard + telemetry +- [ ] Story 3.1 — `StudyChainLink.selected_followup_kind` + coercion +- [ ] Story 3.2 — chain-panel badge +- [ ] Story 4.1 — docs (tutorial + runbook + arch) + +### Blocked items +- None. + +### Done this sprint +- (none yet) + +## 10) Story-by-Story Verification Gate + +Per story: files match scope; the one new endpoint-affecting change (`POST /studies` accepting `auto_followup_strategy`) + the `/chain` additive field implemented exactly; key interfaces match; tests at every touched layer; `make test-unit` + targeted `make test-integration` + `make test-contract` + `cd ui && pnpm test` pass; no migration (verify Alembic head unchanged at `0022`); docs updated in the same PR when the contract changed. + +## 11) Plan consistency review + +1. **Endpoint count:** spec §8.1 lists 2 affected endpoints (`POST /studies` additive field, `GET /chain` additive field) — both covered (Story 1.1 + Story 3.1). No new endpoint. ✓ +2. **Error code coverage:** spec §8.6 lists 1 new code `AUTO_FOLLOWUP_STRATEGY_INVALID` — covered by Story 1.1 contract test (AC-1, AC-2). ✓ +3. **FR coverage:** all 9 FRs in §1 traceability table, each assigned to ≥1 story. ✓ +4. **Story internal consistency:** no new-file ownership conflicts (only `auto_followup_strategy.py` + 2 new test files are net-new; all else are edits). ✓ +5. **Test file assignment:** every test file assigned to a story's DoD (§3 inventory ↔ stories). ✓ +6. **Gate arithmetic:** no numeric gates beyond AC-1..18, all mapped in §17 of the spec. ✓ +7. **Open questions:** spec §19 OQ-1 + OQ-2 both resolved (D-11, D-15). ✓ +8. **Infra paths:** Alembic head `0022` verified (no migration); `auto_followup_strategy.py` path matches the `backend/app/domain/study/` layout; `studies.py` chain builder + `schemas.py` `StudyChainLink` verified to exist. ✓ +9. **Frontend plumbing:** `link.selected_followup_kind` flows from the `/chain` response (Story 3.1) to the panel (Story 3.2); `OVERNIGHT_STRATEGY_VALUES` flows from `enums.ts` to the modal. ✓ +10. **Enumerated value contracts:** two enumerated fields (`auto_followup_strategy`, `selected_followup_kind`) both have backend source-of-truth constants (`AUTO_FOLLOWUP_STRATEGY_VALUES`, `SELECTED_FOLLOWUP_KIND_VALUES`) + frontend mirrors + discipline tests. ✓ +11. **Audit-event coverage:** the autopilot's child-study creation is an existing mutation covered by `feat_auto_followup_studies`' obligations (currently N/A pre-MVP3 — no `audit_log` until MVP3). This feature adds no new `audit_log`-requiring mutation; the 3 new events are structlog-only. Explicitly justified. ✓ + +## 12) Definition of plan done + +- [x] Every FR mapped to stories/tasks/tests/docs. +- [x] Every story includes New/Modified files, (endpoints where applicable), key interfaces, tasks, DoD. +- [x] Test layers (unit/integration/contract/e2e) explicitly scoped + assigned. +- [x] Doc updates planned (Story 4.1 + finalization). +- [x] Lean refactor scope = none (additive feature; legacy path frozen). +- [x] Epic gates measurable (per-story DoD). +- [x] Story-by-Story Verification Gate included. +- [ ] Plan consistency review (§11) performed — pending GPT-5.5 cross-model pass. diff --git a/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/phase2_idea.md b/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/phase2_idea.md new file mode 100644 index 00000000..2014d9b8 --- /dev/null +++ b/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/phase2_idea.md @@ -0,0 +1,71 @@ +# Phase 2 — Morning summary card + study-detail strategy line + +**Date:** 2026-06-03 +**Status:** Idea — deferred Phase 2 from `feat_overnight_final_solution` Phase 1 spec +**Priority:** P2 +**Origin:** Carried out of `feat_overnight_final_solution/feature_spec.md` §3 "Phase boundaries" + §19 D-5/D-15. Phase 1 delivered cross-knob/cross-template autonomous exploration with the rollup data already available via the existing `/chain` endpoint; Phase 2 polishes the morning-review surface. +**Depends on:** `feat_overnight_final_solution` Phase 1 (this folder's `feature_spec.md`) must be merged first. + +> **Priority guidance:** P2 — UX polish. Not blocking the capability the Phase 1 spec delivers; lifts the morning-review experience from "open the study detail page → scroll to the chain panel" to "one card at the top says here's the answer." + +## Problem + +After Phase 1 ships, an operator who picks `follow_suggestions` overnight wakes up to: + +- a chain of up to 6 studies, each with its own auto-created `pending` proposal; +- the existing `/chain` endpoint that already rolls up `best_link_id` + `cumulative_lift` + `proposal_id_for_best_link` + `stop_reason`; +- the existing chain panel under `/studies/{id}` that surfaces the rolled-up summary and the new per-link strategy badges (FR-7). + +What's missing in Phase 1: a **dedicated morning surface** that says *"explored 4 strategies overnight, settled on swap_template to function-score-v1, +18% vs baseline, here's the PR to ship"* — in one card, surfacing the **path** (which knobs/templates were explored, in order) alongside the winner. The chain panel surfaces the data, but it's mid-page and panel-shaped; a top-of-page summary card would compress the morning review into a single glance. + +Also deferred from Phase 1: the **strategy read-only line on the study detail page** (Phase 1 OQ-2 / D-15) — an at-a-glance "this study is running under `follow_suggestions`" cue that lives on the detail page alongside the existing config summary. Defer rationale: redundant with the per-link badges in Phase 1's chain panel; revisit if operator feedback says the badges are too far down the page. + +## Proposed capabilities + +### Cap 1 — Top-of-page "Overnight result" card on `/studies/{id}` when the chain terminated + +- New card mounted above `LinkedEntitiesRow` on `/studies/{study_id}` when `chain.links.length >= 2` AND `chain.stop_reason in {"no_lift", "depth_exhausted", "budget", "parent_failed", "cancelled"}` (i.e., terminal chain). +- Content: + - Headline: *"Overnight exploration complete — {N} studies, +{X.YY}% lift"*. + - One-line path summary: *"Explored: {kind₁} → {kind₂} → {kind₃}"* using the per-link `selected_followup_kind` values from `StudyChainLink` (Phase 1 FR-6). + - Best config link → `/proposals/{proposal_id_for_best_link}` (already exists in `/chain` response). + - Total lift vs anchor baseline (already computed by `/chain`). + - Reason it stopped (the existing `stop_reason` mapped to a friendly phrase). + +### Cap 2 — `/studies` list "ran while away" badge + +- Coordinates with sibling [`feat_overnight_studies_summary_card`](../feat_overnight_studies_summary_card/idea.md) (already in 02_mvp2). The two ideas overlap — the morning card on the detail page (Cap 1 here) is the deep view; the index-page badge from the sibling idea is the discoverability cue. Resolve at Phase 2 spec time whether to fold them or coordinate them. + +### Cap 3 — Strategy read-only line on the study detail page + +- When the local study has `config.auto_followup_strategy = "follow_suggestions"`, surface a one-line "Strategy: Try suggested follow-ups" badge in the existing study-detail config summary (above or beside `LinkedEntitiesRow`). +- For `"narrow"` / `None` / `"narrow"` (default), surface nothing (or a subtle "Strategy: Refine same knobs" line, depending on UX call). + +### Cap 4 — Narrative summary in the morning card + +- Optional: a short natural-language paragraph in the card summarizing what the chain found. Could reuse the existing digest narrative of the winning link OR generate a chain-level narrative via a small LLM call. Defer the LLM-call decision to spec time — likely "no new LLM call, reuse the winning digest's narrative." + +## Scope signals + +- **Backend:** No new endpoint required. `/chain` already exposes `best_link_id`, `cumulative_lift`, `proposal_id_for_best_link`, `stop_reason`, and per-link `selected_followup_kind` (from Phase 1 FR-6). Cap 4's narrative reuse just reads `digests.narrative` for the best link. +- **Frontend:** New `OvernightResultCard` component (`ui/src/components/studies/overnight-result-card.tsx`) mounted on `/studies/{id}`. Cap 3 adds a line to the existing study-detail config summary. Both consume data already returned by existing endpoints. +- **Migration:** None. +- **Config:** None. +- **Audit events:** N/A pre-MVP3. + +## Why deferred from Phase 1 + +Phase 1's job was the **capability** — let the autopilot explore across knobs and templates autonomously. The data needed for the morning rollup card is already exposed by the existing `/chain` endpoint plus Phase 1's additive `selected_followup_kind` field. The card itself is a UX polish that can be designed once operators have used Phase 1 for a few cycles and we know what summary shape lands best. Shipping Phase 2 with Phase 1 would force UX decisions (card placement, copy, narrative source) before any operator has used the capability — a worse design loop than "ship the capability, observe usage, design the card." + +## Relationship to other work + +- **Builds on** [`feat_overnight_final_solution`](feature_spec.md) Phase 1 — depends on its `selected_followup_kind` field and the strategy persistence. +- **Coordinates with** [`feat_overnight_studies_summary_card`](../feat_overnight_studies_summary_card/idea.md) — index-page "ran while away" surface; resolve overlap at Phase 2 spec time. +- **Composes with** [`feat_study_convergence_indicator`](../../implemented_features/2026_05_31_feat_study_convergence_indicator/feature_spec.md) — the morning card may want to surface the winning link's convergence verdict too. + +## Open questions + +- **Q1** — Mount point: top of `/studies/{id}` (above all panels) vs a new tab? Recommend top-of-page banner card; tabs hide information. +- **Q2** — Card visibility predicate: every terminated chain, or only `follow_suggestions` chains? Recommend every terminated chain ≥ 2 links — the rollup is useful for narrow-only chains too. +- **Q3** — Fold Cap 3 (strategy line) into Cap 1 (card) or keep separate? Recommend keep separate — Cap 1 fires only on terminal chains, Cap 3 also helps mid-chain operators. +- **Q4** — Fold with `feat_overnight_studies_summary_card`? Spec-time decision. diff --git a/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/phase3_idea.md b/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/phase3_idea.md new file mode 100644 index 00000000..1a67bdbb --- /dev/null +++ b/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/phase3_idea.md @@ -0,0 +1,72 @@ +# Phase 3 — Proposal `superseded` status for non-winning chain links + +**Date:** 2026-06-03 +**Status:** Idea — deferred Phase 3 from `feat_overnight_final_solution` Phase 1 spec +**Priority:** Backlog +**Origin:** Carried out of `feat_overnight_final_solution/feature_spec.md` §3 "Phase boundaries" + §19 D-8. Phase 1 ships the cross-knob exploration capability and leans on `best_link_id` + `proposal_id_for_best_link` from the existing `/chain` endpoint to surface the morning artifact as a single proposal. Phase 3 polishes the `/proposals` index by marking non-winning chain links' proposals `superseded` so the morning view is unambiguously "one answer." +**Depends on:** `feat_overnight_final_solution` Phase 1 must be merged first. Independent of Phase 2 (the morning summary card). + +> **Priority guidance:** Backlog — defer-until-incident. The Phase 1 capability does not require this. File once an operator (or design partner) reports `/proposals` clutter as friction during morning review. + +## Problem + +When `follow_suggestions` runs a 4-link chain, today's proposal-creation logic ([`backend/workers/orchestrator.py`](../../../../backend/workers/orchestrator.py) `_on_study_complete`) creates **one `pending` proposal per completed link** — yielding up to 6 proposals (anchor + 5 descendants) in `/proposals`. Phase 1 surfaces a single "best" via `best_link_id` + `proposal_id_for_best_link` on `/chain`, but the index page still shows all 6 as `pending`. The non-winning links' proposals dead-end the operator: they're real `pending` rows but shipping any of them would discard the chain's winning insight. + +Phase 3 marks those non-winning proposals `superseded` so: + +- the `/proposals` index can hide or visually de-emphasize them by default; +- the `pending` status accurately means "ready to ship, no better alternative known"; +- audit trails preserve the full chain history (superseded ≠ deleted). + +## Proposed capabilities + +### Cap 1 — Add `superseded` to the `proposals.status` CHECK constraint + +- **Migration:** alter the CHECK on `proposals.status` to `IN ('pending', 'pr_opened', 'pr_merged', 'rejected', 'superseded')`. Mirror in `Proposal.__table_args__` at [`backend/app/db/models/proposal.py:42`](../../../../backend/app/db/models/proposal.py#L42). +- New value semantics: a `pending` proposal that the system decided is dominated by a sibling chain link's proposal. Not operator-rejected; not auto-deleted; not shipped. +- Allowed state transitions: `pending → superseded` (auto by chain-rollup), `superseded → pending` (operator action via UI, when they explicitly want to ship the runner-up). + +### Cap 2 — Auto-supersede non-winning chain links' proposals on chain termination + +- On the chain-termination signal (definable as: the tail link reaches a terminal status AND `stop_reason ∈ {"depth_exhausted", "no_lift", "budget", "parent_failed", "cancelled"}`), run a service helper `mark_non_winning_chain_proposals_superseded(chain_anchor_id)`: + 1. Walk the chain via `parent_study_id`. + 2. Identify the `best_link_id` per the same rule as `/chain` endpoint (completed subset, direction-aware argmax/argmin, tie-break by `created_at ASC`). + 3. For every link OTHER than the best, find its `pending` proposal(s) (none if rejected); `UPDATE proposals SET status = 'superseded' WHERE id IN (...) AND status = 'pending'`. + 4. Idempotent — re-running the helper on the same chain produces zero updates. +- Trigger mechanism options: (a) extend `_on_study_complete` to walk the chain; (b) a new dedicated Arq job dispatched after the final link's digest; (c) periodic reconciler. Recommend (a) — same code path that already creates the per-link proposals. + +### Cap 3 — Frontend filtering on `/proposals` + +- Default filter excludes `superseded`. Operator can opt in via a "Show superseded" toggle. +- `StatusBadge` adds a `superseded` variant (greyed, "Superseded"). +- The chain panel (Phase 1 FR-7) MAY also surface the superseded marker per link so the operator sees the audit trail. + +## Scope signals + +- **Backend:** + - One Alembic migration: ALTER constraint on `proposals_status_check` (requires drop + re-add with new value list). Idempotent rollback adds the constraint back without `superseded`. + - New service helper `mark_non_winning_chain_proposals_superseded`. + - Service-state-machine guard updates at `backend/app/services/proposal_state.py` (if it exists) or wherever proposal transitions are gated. + - Repo helper `list_pending_proposals_for_chain(anchor_id)`. +- **Frontend:** `/proposals` index filter + status badge + per-link badge on chain panel. +- **Migration:** Yes — `proposals_status_check` CHECK constraint extension. Round-trip verified. +- **Config:** None. +- **Audit events:** MVP3+ — when `audit_log` lands, emit `proposal_superseded` event with `study_id`, `proposal_id`, `chain_anchor_id`, `best_link_id`. Pre-MVP3: structlog INFO only. + +## Why deferred from Phase 1 + +Phase 1's `/chain` endpoint already gives the operator a single morning artifact via `best_link_id` + `proposal_id_for_best_link`. The friction Phase 3 addresses (cluttered `/proposals` index) is real but downstream — operators who use `/chain`-derived links exclusively never see the clutter, and operators who do browse `/proposals` get a visual signal (the badge) only after the system has marked superseded, which itself depends on the chain-termination logic. + +Critically: Phase 3 requires a migration that **reopens shipped schema** (the `proposals_status_check` CHECK constraint added in `feat_study_lifecycle`). That's a heavier change than Phase 1's all-JSONB additions. The Phase 1 cap-on-cap approach lets us ship the capability without the schema-extension surface; we can add Phase 3 once an operator reports the friction. + +## Relationship to other work + +- **Depends on** [`feat_overnight_final_solution`](feature_spec.md) Phase 1 — uses its chain-termination signal. +- **Adjacent to** [`feat_overnight_final_solution`](feature_spec.md) Phase 2 — the morning card (Phase 2) may want to know which intermediate proposals are superseded for cleaner rendering. +- **Independent of** `feat_overnight_studies_summary_card` — different surface. + +## Open questions + +- **Q1** — When the `best_link_id` flips after chain termination (e.g., a delayed metric re-compute, or operator re-runs the chain with bigger budget): should the previously-superseded proposals flip back to `pending`? Recommend yes — the helper is idempotent; re-running with the new winner reshuffles correctly. Edge case: a proposal already shipped to a PR (`pr_opened`/`pr_merged`) MUST NOT flip back to `pending`. Document the precedence rule. +- **Q2** — Operator UX for ship-the-runner-up: do we surface a "Reinstate this proposal" button on a superseded row, or require the operator to manually `PATCH status = pending`? Recommend the button — keeps the operator in the UI. +- **Q3** — Should `rejected` proposals from prior chain runs be preserved (not flipped to superseded)? Yes — rejection is a stronger operator signal than supersession. diff --git a/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/pipeline_status.md b/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/pipeline_status.md new file mode 100644 index 00000000..ee66075c --- /dev/null +++ b/docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/pipeline_status.md @@ -0,0 +1,25 @@ +# Pipeline Status — feat_overnight_final_solution + +## Idea +- Status: Complete +- File: idea.md + +## Spec +- Status: Approved +- Date: 2026-06-03 +- File: feature_spec.md +- Cross-model review: GPT-5.5 passed (2 cycles to convergence; 0 High-severity findings at cycle 2) +- Cycle 1: 11 findings (6 High, 5 Medium, 0 Low) — all 11 accepted and applied +- Cycle 2: 6 findings (0 High, 5 Medium, 1 Low) — all 6 accepted and applied (internal-consistency cleanups from cycle 1 edits) +- Phases: 3 total (Phase 1 covered by this spec; Phase 2 + Phase 3 deferred with `phase2_idea.md` + `phase3_idea.md`) + +## Plan +- Status: Approved +- Date: 2026-06-03 +- File: implementation_plan.md +- Cross-model review: GPT-5.5 passed (2 cycles; cycle 1: 10 findings (5 High, 5 Medium) all accepted+applied; cycle 2: 0 findings — converged) +- Stories: 7 across 4 epics (Epic 1 schema+wizard, Epic 2 worker dispatch, Epic 3 chain surface, Epic 4 docs) +- Phases covered: Phase 1 (Phase 2 + 3 deferred via phase2_idea.md + phase3_idea.md) + +## Implementation +- Status: Not started diff --git a/docs/00_overview/planned_features/02_mvp2/feat_proposal_full_param_space_view/idea.md b/docs/00_overview/planned_features/02_mvp2/feat_proposal_full_param_space_view/idea.md new file mode 100644 index 00000000..4917c53c --- /dev/null +++ b/docs/00_overview/planned_features/02_mvp2/feat_proposal_full_param_space_view/idea.md @@ -0,0 +1,60 @@ +# Proposal page — show winning knobs in the context of the full available parameter space + +**Date:** 2026-06-03 +**Status:** Idea — user request during the same session as `feat_overnight_final_solution` +**Priority:** P2 +**Origin:** Operator question reviewing `/proposals/019e8f54-...`: *"when you see a proposal, you can see the knobs that were turned but it is still valuable information which knobs were not turned. Would it make sense to show the complete parameter space so that the user will see which knobs were turned within the context of all available knobs?"* +**Depends on:** None — uses existing schema only. + +> **Priority guidance:** P2 — UX clarity improvement, not blocking anything. Inexpensive (no schema change, no new data) but a real upgrade to how operators reason about a proposal in context. + +## Problem + +The proposal detail page surfaces `config_diff` — the subset of parameters the study **tuned** — and the winning values for them. Today's example proposal carries `{boost: {from: 1.0, to: 2.5}}` and reads as "we tuned title boost." What it does not show: which other knobs *exist* on the same template that the study **did not** tune. The operator is left to guess whether the optimizer considered description boost, fuzziness, function-score decay, etc. and rejected them, or whether those knobs were simply not in the study's search space. + +That gap matters because the proposal's own suggested follow-ups (`narrow` / `widen` / `swap_template`) frequently reference parameters that *weren't* in this study's search space — "Try varying `description.boost` next" reads disconnectedly without a visible reference list of "all knobs this template supports." Putting the tuned subset in the context of the full parameter space makes the follow-up cards self-explanatory and lets the operator reason about coverage at a glance. + +## Proposed capabilities + +### Cap 1 — Render the template's full parameter space on the proposal page + +- On `/proposals/[id]`, add a panel (or extend the existing "Recommended config" panel) that lists **every parameter the template declares** — i.e., all of `query_templates.declared_params` for `proposal.template_id`. +- For each parameter, show one of three visual states: + - **Tuned (winning value)** — appears in `config_diff`. Show the winning value (and ideally the from→to delta from `config_diff`). + - **Tuned (default-value winner)** — the param was in the study's search space but the optimizer landed back at the parent's prior value (rare but possible). Show the prior value and a subtle "no change" marker. + - **Not in this study's search space** — declared on the template but the study didn't tune it. Show the parameter name + the template's default/type, with a "not tuned" marker (greyed, italic, or labeled). +- The grouping makes the proposal's tuned subset visually distinct from the un-tuned-but-available subset. + +### Cap 2 — Connect "not tuned" knobs to the follow-up cards + +- When a follow-up card references a parameter that appears in the "not tuned" group (e.g. `swap_template` whose target tunes `description_boost`, or a text suggestion saying "Try varying X next"), the parameter name in the un-tuned list should be clickable/anchored so the operator can see "this knob is the one the follow-up is pointing at." +- Light-weight UX: don't over-engineer the linkage. A shared `data-param-name` attribute the cards highlight on hover is enough. + +### Cap 3 — Optional: surface the same view on the study detail page + +- The study detail page already shows trial scatter / parameter-importance — those are the *internal* lens on what was tuned. The same "winning knobs in the context of the full template" view would be a complementary *outward* lens. +- Defer this until Cap 1 + Cap 2 prove out. Same data, different mount point; could live behind a tab. + +## Scope signals + +- **Backend:** No change required. `proposal.template_id` is already on the proposal; the proposal detail endpoint can include the template's `declared_params` in its response payload (one additional column read on the existing JOIN, or a separate `GET /api/v1/query-templates/{id}` call from the UI). Lightly favor including it in the proposal payload to keep the page round-trip count at 1. +- **Frontend:** New (or extended) panel on `/proposals/[id]`. Logic is pure set algebra on `declared_params.keys()` vs `config_diff.keys()` — no client-side computation beyond a `.map()`. Possibly extends `ui/src/components/proposals/`. +- **Migration:** None. +- **Config:** None. +- **Audit events:** N/A — read-only UI. + +## Why deferred / not yet prioritized + +The proposal page works correctly today — it just doesn't give the operator the full picture in one glance. The fix is small but not blocking any pipeline, incident, or daily cost. Captured here so it lands in the MVP2 polish wave alongside the other proposal-UI improvements rather than getting smuggled into an unrelated PR. + +## Relationship to other work + +- **Adjacent to** [`feat_overnight_final_solution`](../feat_overnight_final_solution/idea.md) (this session) — when the overnight chain can swap templates / tune different knobs automatically, the proposal page becomes *the* artifact operators read in the morning. Making it self-explanatory (this idea) compounds the value of the autonomous-exploration work. +- **Adjacent to** [`feat_proposal_full_param_space_view`'s sibling] — none currently. +- **Consumes** existing data only: `proposals.template_id`, `proposals.config_diff`, `query_templates.declared_params`. No new tables, no migration. + +## Open questions (resolve at spec time) + +- **Q1** — Single panel or two stacked panels (tuned vs not-tuned)? Recommended: one panel with visual grouping; two panels feels heavyweight. +- **Q2** — Display the *type* (float / int / categorical) for un-tuned params, or just the name? Recommended: show type + the template's stated bounds/default — that's the most useful framing of "what could have been tuned." +- **Q3** — Should `swap_template` follow-ups also call out which params the swap target tunes that the current template doesn't (a third group)? Probably yes, but lower priority than Cap 1 + Cap 2. diff --git a/docs/01_architecture/api-conventions.md b/docs/01_architecture/api-conventions.md index 47a6af45..feed9b35 100644 --- a/docs/01_architecture/api-conventions.md +++ b/docs/01_architecture/api-conventions.md @@ -81,7 +81,15 @@ The studies endpoint surfaces two template-mismatch codes (added by `chore_creat | `JUDGMENT_TARGET_MISMATCH` | 422 | `judgment_list.target` does not equal the study's `target` on `POST /api/v1/studies` (added by `feat_study_target_judgment_mismatch_guard`, 2026-05-21). `retryable: false`. Fires AFTER the cluster check. Recovery: pick a judgment list authored against the study's target, or change the study's target. Catches the literal study2 incident — judgments authored on `e2e-target` paired with a study against `docs-articles` would otherwise burn the entire trial budget scoring 0.0 on every (params, query) pair. | | `INSUFFICIENT_JUDGMENT_OVERLAP` | 422 | `POST /api/v1/studies` create-time probe sampled up to `MAX_PROBED_DOCS=200` judged `doc_id`s from the first qid in the query set with any judgments (by `id ASC`); the count present in the study's target index was below `min(MIN_OVERLAP=3, max(judged_doc_count, 1))` (added by `feat_study_preflight_overlap_probe`, 2026-05-22). `retryable: false`. Recovery: regenerate judgments against the current index (most common cause: target index was rebuilt or `_reindex`'d with new doc IDs since the judgments were authored), or rebuild the index from the snapshot the judgments were authored on. Fires AFTER `JUDGMENT_TARGET_MISMATCH`. Probe is skipped (with WARN log `studies.preflight.overlap_probe.skipped`, `reason ∈ {unreachable, timeout, invalid_query_dsl}`) when the cluster is unreachable / probe times out / engine rejects the bare ids query — the orchestrator's per-trial failure handling catches those cases mid-flight. | -The studies endpoint also surfaces three new codes for the "Run this followup" lineage payload (added by `feat_digest_executable_followups`, 2026-05-24): +The studies endpoint also surfaces one error code for the overnight Strategy toggle (added by `feat_overnight_final_solution`, 2026-06-03): + +| Code | HTTP Status | Meaning | +|---|---|---| +| `AUTO_FOLLOWUP_STRATEGY_INVALID` | 422 | `POST /api/v1/studies` body carried either an unknown `config.auto_followup_strategy` value (allowed: `"narrow"` or `"follow_suggestions"`), `auto_followup_strategy` set without `auto_followup_depth >= 1` (pair-rule), OR an operator-submitted worker-managed key (`config.auto_followup_visited_template_ids` or `config.auto_followup_selected_kind`; both single-writer per D-14). `retryable: false`. Source-of-truth tuple: `AUTO_FOLLOWUP_STRATEGY_VALUES` at `backend/app/api/v1/schemas.py` (mirrored by `OVERNIGHT_STRATEGY_VALUES` in `ui/src/lib/enums.ts`). | + +The `/api/v1/studies/{id}/chain` endpoint's `StudyChainLink` response model gained two additive fields in the same feature (no new endpoints): `template_id: str` (needed by the chain panel's `swap_template` badge to resolve the target template's display name via `GET /api/v1/query-templates/{id}`) and `selected_followup_kind: Literal["narrow_default","narrow","widen","swap_template"] | None` (the path the autopilot took for each link; null for anchors + legacy/`"narrow"` strategy chains per D-12). Existing clients ignore both — backward-compatible. + +The studies endpoint also surfaces three codes for the "Run this followup" lineage payload (added by `feat_digest_executable_followups`, 2026-05-24): | Code | HTTP Status | Meaning | |---|---|---| diff --git a/docs/01_architecture/data-model.md b/docs/01_architecture/data-model.md index 4f6862bb..6c44d5b2 100644 --- a/docs/01_architecture/data-model.md +++ b/docs/01_architecture/data-model.md @@ -218,7 +218,7 @@ CREATE TABLE studies ( judgment_list_id UUID NOT NULL REFERENCES judgment_lists(id), search_space JSONB NOT NULL, -- per-parameter range/choice spec objective JSONB NOT NULL, -- {metric, k, direction} - config JSONB NOT NULL, -- {max_trials, time_budget_min, parallelism, sampler, pruner, seed, trial_timeout_s} + config JSONB NOT NULL, -- {max_trials, time_budget_min, parallelism, sampler, pruner, seed, trial_timeout_s, auto_followup_depth, auto_followup_strategy, auto_followup_visited_template_ids (worker-managed), auto_followup_selected_kind (worker-managed)} — last three keys added by feat_overnight_final_solution (2026-06-03); see "Studies config keys" note below status TEXT NOT NULL CHECK (status IN ('queued', 'running', 'completed', 'cancelled', 'failed')), failed_reason TEXT, -- populated when status='failed' optuna_study_name TEXT NOT NULL UNIQUE, -- convention: optuna_study_name = str(studies.id) @@ -234,6 +234,24 @@ CREATE TABLE studies ( completed_at TIMESTAMPTZ ); +-- Studies config keys (no schema change; all keys are JSONB inner shape). +-- feat_overnight_final_solution (2026-06-03) added three optional keys: +-- * auto_followup_strategy — operator-facing wire field, "narrow" | "follow_suggestions" | absent. +-- Validated by StudyConfigSpec._validate_auto_followup_strategy via the +-- AUTO_FOLLOWUP_STRATEGY_INVALID error-code prefix (D-13). Default (absent or +-- "narrow") is byte-identical to pre-feature behavior. +-- * auto_followup_visited_template_ids — worker-managed cycle-guard list, +-- ordered-unique. Persisted ONLY by the autopilot worker under +-- "follow_suggestions" strategy (D-12); the wizard 422-rejects operator- +-- submitted values (single-writer rule per D-14). +-- * auto_followup_selected_kind — per-link audit field; one of +-- "narrow_default" | "narrow" | "widen" | "swap_template" or absent. +-- Persisted ONLY by the autopilot worker under "follow_suggestions"; the +-- legacy/default narrow path persists no key at all (D-12). Surfaced as +-- StudyChainLink.selected_followup_kind on the /chain endpoint with a +-- defensive coercion against unknown values (chain_selected_kind_unknown +-- WARN; never raises ValidationError that would 500 the endpoint). + CREATE TABLE trials ( id UUID PRIMARY KEY, study_id UUID NOT NULL REFERENCES studies(id) ON DELETE CASCADE, diff --git a/docs/01_architecture/ui-architecture.md b/docs/01_architecture/ui-architecture.md index 585391ef..3a5beb5e 100644 --- a/docs/01_architecture/ui-architecture.md +++ b/docs/01_architecture/ui-architecture.md @@ -38,7 +38,7 @@ Per umbrella spec §22, MVP1 ships these top-level routes: | `/templates` | Templates list | `feat_studies_ui` | | `/templates/{id}` | Template editor | `feat_studies_ui` | | `/studies` | Studies list. Columns: name, cluster, status, best_metric (with `Pinned at metric ceiling` badge for `>=0.99` on `maximize` studies), `Trials` (non-baseline count), `Convergence` (badge — `Converged`/`Improving`/`Too few trials`/em-dash), created_at, completed_at. Trials + Convergence columns added by `feat_studies_convergence_visibility` Epic 1 (2026-06-02) — backend computes them via `count_trials_for_studies` + `resolve_list_convergence_verdicts` (bounded to 1–2 queries per page; FR-3). The Convergence badge reuses `CONVERGENCE_VERDICT_VALUES` (`ui/src/lib/enums.ts`) for source-of-truth discipline and the `convergence_verdict` glossary key for the tooltip — same taxonomy as the `` on the detail page. | `feat_studies_ui` | -| `/studies/{id}` | Study detail (live trial table + digest; the `AutoFollowupChainPanel` renders a rolled-up **Overnight chain** summary — ordered links, cumulative lift, best-config, stop reason — fed by `useStudyChain` against `GET /studies/{id}/chain`. Refetch contract per `feat_overnight_autopilot` D-10; render predicate D-13; best-config 3-branch D-11. The `ConvergencePanel` mounts between `ConfidencePanel` and the trials table — verdict badge + best-so-far Recharts curve fed by `StudyDetail.convergence`, with three null-state branches (still_running / not_enough_trials / unavailable) per `feat_study_convergence_indicator` AC-13/13b/13c. The `ConvergenceVerdict` Literal flows via the FR-7 soft contract to the autopilot chain panel's per-link summary — the autopilot PR consumes the type symbol; AC-16 lives in the autopilot CI lane) | `feat_studies_ui` | +| `/studies/{id}` | Study detail (live trial table + digest; the `AutoFollowupChainPanel` renders a rolled-up **Overnight chain** summary — ordered links, cumulative lift, best-config, stop reason — fed by `useStudyChain` against `GET /studies/{id}/chain`. Refetch contract per `feat_overnight_autopilot` D-10; render predicate D-13; best-config 3-branch D-11. **Per-link Strategy badge** added by `feat_overnight_final_solution` Story 3.2 (`feat_overnight_final_solution` FR-7) — a compact `narrow ↓` / `widen ↑` / `swapped to {short_template_name}` / `refined` label per link, sourced from `StudyChainLink.selected_followup_kind` (additive optional field with defensive coercion at chain-summary construction so unknown JSONB values become `null` + a `chain_selected_kind_unknown` WARN, never a 500). The swap_template badge resolves the target's display name via a per-link `useTemplate(link.template_id)` fetch (per OQ-1 / D-11). The `ConvergencePanel` mounts between `ConfidencePanel` and the trials table — verdict badge + best-so-far Recharts curve fed by `StudyDetail.convergence`, with three null-state branches (still_running / not_enough_trials / unavailable) per `feat_study_convergence_indicator` AC-13/13b/13c. The `ConvergenceVerdict` Literal flows via the FR-7 soft contract to the autopilot chain panel's per-link summary — the autopilot PR consumes the type symbol; AC-16 lives in the autopilot CI lane) | `feat_studies_ui` | | `/proposals` | Proposals list | `feat_proposals_ui` | | `/proposals/{id}` | Proposal detail (config diff + metric delta + PR link) | `feat_proposals_ui` | diff --git a/docs/03_runbooks/auto-followup-debugging.md b/docs/03_runbooks/auto-followup-debugging.md index 6b7e30ac..a03e0408 100644 --- a/docs/03_runbooks/auto-followup-debugging.md +++ b/docs/03_runbooks/auto-followup-debugging.md @@ -22,7 +22,21 @@ Every chain enqueue / skip / cancel branch emits a distinct `event_type` so a si | 7 | `auto_followup_enqueued_duplicate_dropped` | worker (layer-2 backstop) | Worker found existing children via `list_children_of_study` and refused to create a second — fires only on Arq `_job_id` dedup miss | | 8 | `auto_followup_cancelled_with_parent` | cascade service | Direct child got cancelled as part of `cancel_study_with_chain_cascade` | -Plus 4 auxiliary events (intentionally outside the FR-9 catalog per cycle-1 C1-5 + cycle-2 C2-3 — they're warning paths, not chain-state events): +Plus 3 events added by `feat_overnight_final_solution` Story 2.2 (only emitted under the `auto_followup_strategy = "follow_suggestions"` path — the legacy/missing/`"narrow"` path stays log-quiet): + +| Event | Where | When | +|---|---|---| +| `auto_followup_strategy_selected` | worker (post-INSERT) | The worker took a selection-driven path (narrow / widen / swap_template). Fields: `parent_study_id`, `child_study_id`, `strategy: "follow_suggestions"`, `selected_kind`, `source_index`, `candidate_count`, `dropped_template_ids`. The `dropped_template_ids` field carries cycle-guard activity on the same line — a non-empty list with `selected_kind = "narrow"` or `"widen"` means the chain wanted to swap to a visited template but the guard fired. | +| `auto_followup_no_executable_candidate_fell_back_to_narrow` | worker (post-INSERT) | `select_executable_followup` returned no candidate (digest had only `text` items, OR every executable was a swap to a visited template). The chain did NOT stall — fell back to today's narrow path. Frequent firing usually means the digest is text-heavy (typical of `still_improving` / `too_few_trials` parent verdicts); the operator should re-run with a larger trial budget rather than continue chaining. Fields: `parent_study_id`, `child_study_id`, `digest_followup_kinds`, `visited_template_id_count`, `dropped_template_ids`. | +| `auto_followup_swap_target_missing` | worker (pre-fallback WARN) | A `swap_template` follow-up pointed at a template that no longer exists (hard-deleted between digest persist and dispatch). Logged BEFORE the fallback decision so `child_study_id` is NOT populated (the fallback child gets created next). Operator action: investigate why a template was deleted while a chain referenced it. Fields: `parent_study_id`, `swap_target_template_id`. | + +Plus 1 auxiliary error event from the same Story 2.2 defensive try/except: + +| Event | Where | When | +|---|---|---| +| `auto_followup_strategy_dispatch_error` | worker (pre-fallback WARN) | An unexpected exception fired inside the `follow_suggestions` dispatch block (digest read / parse / select). The chain falls back to the narrow path; reliability does not regress vs the legacy path. Fields: `parent_study_id`, `error` (truncated to 200 chars). | + +Plus 4 long-standing auxiliary events (intentionally outside the FR-9 catalog per cycle-1 C1-5 + cycle-2 C2-3 — they're warning paths, not chain-state events): | Event | Where | When | |---|---|---| diff --git a/docs/08_guides/tutorial-first-study.md b/docs/08_guides/tutorial-first-study.md index c7d45386..366b7d40 100644 --- a/docs/08_guides/tutorial-first-study.md +++ b/docs/08_guides/tutorial-first-study.md @@ -447,14 +447,37 @@ deterministically, and stops on its own when the lift plateaus. 1. Open the **Create study** wizard. Pick the **Deep (1000)** preset. 2. Set **🌙 Run overnight (compound automatically)** to **depth 3**. -3. Click **Create study** before you log off. -4. In the morning, open the study detail page. The **Overnight chain** +3. Pick a **Strategy** (see below). +4. Click **Create study** before you log off. +5. In the morning, open the study detail page. The **Overnight chain** panel summarises what ran, the cumulative lift across the chain, which link won, and why the chain stopped. -5. The summary points at a proposal — click it, review the diff, open the +6. The summary points at a proposal — click it, review the diff, open the PR. (You can also cancel any mid-chain study with `?cascade=true` (the default) to halt pending children.) +### Strategy — Refine vs. Try suggestions + +The new **Strategy** toggle (visible only after depth ≥ 1 is selected) +picks how each follow-up is chosen: + +- **Refine the same knobs (predictable)** — the safer default. Each + follow-up tightens the search space around the previous winner *on the + same template*. The chain hill-climbs one set of knobs deterministically. + Use this when you trust the template + the parameters you're tuning and + you just want better numbers on them. +- **Try suggested follow-ups (broader exploration)** — each follow-up + acts on the parent digest's top runnable recommendation, which may + *widen* the bounds OR *swap* the template (e.g. from `multi-match` to + `function-score-decay`). A cycle guard prevents the chain from + ping-ponging between two templates. When the digest has no runnable + suggestion, the chain falls back to today's narrow behavior so it + never stalls. + +You'll see what each link did on the chain panel: a small `narrow ↓` / +`widen ↑` / `swapped to {template_name}` / `refined` badge next to each +study tells you the path the autopilot took. + **RelyLoop runs the exploration overnight unattended, but it never opens a PR on your behalf. The chain ends with a proposal you review and merge — your one decision.** diff --git a/ui/openapi.json b/ui/openapi.json index 381f00ac..d6f884c0 100644 --- a/ui/openapi.json +++ b/ui/openapi.json @@ -1 +1 @@ -{"components":{"schemas":{"BulkQueriesResponse":{"description":"``POST /api/v1/query-sets/{id}/queries`` response.","properties":{"added":{"title":"Added","type":"integer"}},"required":["added"],"title":"BulkQueriesResponse","type":"object"},"CIShape":{"description":"Bootstrap percentile CI on the winner's per-query metric values.","properties":{"high":{"title":"High","type":"number"},"low":{"title":"Low","type":"number"},"method":{"const":"bootstrap_n1000","title":"Method","type":"string"},"n_samples":{"title":"N Samples","type":"integer"}},"required":["low","high","method","n_samples"],"title":"CIShape","type":"object"},"CalibrationResponse":{"description":"Calibration endpoint response.\n\nMirrors :class:`backend.app.eval.calibration.CalibrationResult` —\npersisted as ``judgment_lists.calibration`` JSONB.","properties":{"cohens_kappa":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Cohens Kappa"},"n_samples":{"title":"N Samples","type":"integer"},"per_class":{"additionalProperties":{"type":"number"},"title":"Per Class","type":"object"},"warning":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Warning"},"weighted_kappa":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Weighted Kappa"}},"required":["cohens_kappa","weighted_kappa","per_class","n_samples","warning"],"title":"CalibrationResponse","type":"object"},"CalibrationSample":{"description":"One row in :class:`CalibrationSamplesRequest`.","properties":{"doc_id":{"maxLength":512,"minLength":1,"title":"Doc Id","type":"string"},"query_id":{"maxLength":36,"minLength":1,"title":"Query Id","type":"string"},"rating":{"enum":[0,1,2,3],"title":"Rating","type":"integer"}},"required":["query_id","doc_id","rating"],"title":"CalibrationSample","type":"object"},"CalibrationSamplesRequest":{"description":"Body for ``POST /api/v1/judgment-lists/{id}/calibration`` (Story 3.5).","properties":{"human_samples":{"items":{"$ref":"#/components/schemas/CalibrationSample"},"minItems":1,"title":"Human Samples","type":"array"}},"required":["human_samples"],"title":"CalibrationSamplesRequest","type":"object"},"CategoricalParam":{"additionalProperties":false,"description":"Discrete choice parameter.\n\nOptuna ``suggest_categorical`` handles strings, ints, floats, and bools\nas choices.","properties":{"choices":{"items":{"anyOf":[{"type":"string"},{"type":"integer"},{"type":"number"},{"type":"boolean"}]},"minItems":1,"title":"Choices","type":"array"},"type":{"const":"categorical","title":"Type","type":"string"}},"required":["type","choices"],"title":"CategoricalParam","type":"object"},"ClusterAggregateHealth":{"description":"Aggregate counts for the ``elasticsearch_clusters`` /healthz field (Story 3.5).\n\nPer spec §2: probes only the *registered* user clusters (from the DB),\nNOT the local Compose ES/OpenSearch — those have their own subsystem\nfields. ``status`` is a count derived from the cached ``cluster:health:*``\nentries; missing-cache or red/unreachable clusters are counted as\n``unreachable``.","properties":{"healthy":{"title":"Healthy","type":"integer"},"registered":{"title":"Registered","type":"integer"},"unreachable":{"title":"Unreachable","type":"integer"}},"required":["registered","healthy","unreachable"],"title":"ClusterAggregateHealth","type":"object"},"ClusterDetail":{"description":"``GET /api/v1/clusters/{id}`` response.","properties":{"auth_kind":{"enum":["es_apikey","es_basic","opensearch_basic","opensearch_sigv4","solr_basic","solr_apikey"],"title":"Auth Kind","type":"string"},"base_url":{"title":"Base Url","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"engine_config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Engine Config"},"engine_type":{"enum":["elasticsearch","opensearch","solr"],"title":"Engine Type","type":"string"},"environment":{"enum":["prod","staging","dev"],"title":"Environment","type":"string"},"health_check":{"$ref":"#/components/schemas/HealthCheckResult"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"notes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Notes"},"target_filter":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Target Filter"}},"required":["id","name","engine_type","environment","base_url","auth_kind","created_at","health_check"],"title":"ClusterDetail","type":"object"},"ClusterListResponse":{"description":"Paginated list response.","properties":{"data":{"items":{"$ref":"#/components/schemas/ClusterSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"ClusterListResponse","type":"object"},"ClusterSummary":{"description":"List-view; drops engine_config + notes for brevity.","properties":{"auth_kind":{"enum":["es_apikey","es_basic","opensearch_basic","opensearch_sigv4","solr_basic","solr_apikey"],"title":"Auth Kind","type":"string"},"base_url":{"title":"Base Url","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"engine_type":{"enum":["elasticsearch","opensearch","solr"],"title":"Engine Type","type":"string"},"environment":{"enum":["prod","staging","dev"],"title":"Environment","type":"string"},"health_check":{"$ref":"#/components/schemas/HealthCheckResult"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"target_filter":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Target Filter"}},"required":["id","name","engine_type","environment","base_url","auth_kind","created_at","health_check"],"title":"ClusterSummary","type":"object"},"ConfidenceShape":{"description":"The top-level shape exposed via ``StudyDetail.confidence``.\n\nEvery sub-field is independently nullable per FR-7 — degraded paths\nsuppress only the sub-fields they affect, never the whole shape (the\norchestrator returns whole-object ``None`` only when the winner trial\nrow itself is missing).","properties":{"ci_95":{"anyOf":[{"$ref":"#/components/schemas/CIShape"},{"type":"null"}]},"convergence":{"anyOf":[{"$ref":"#/components/schemas/ConvergenceShape"},{"type":"null"}]},"headline":{"$ref":"#/components/schemas/HeadlineShape"},"late_trial_stddev":{"anyOf":[{"$ref":"#/components/schemas/LateTrialStddevShape"},{"type":"null"}]},"per_query_outcomes":{"anyOf":[{"$ref":"#/components/schemas/PerQueryOutcomesShape"},{"type":"null"}]},"runner_up_gap":{"anyOf":[{"$ref":"#/components/schemas/RunnerUpGapShape"},{"type":"null"}]}},"required":["headline","ci_95","runner_up_gap","late_trial_stddev","convergence","per_query_outcomes"],"title":"ConfidenceShape","type":"object"},"ConfigRepoDetail":{"description":"``GET /api/v1/config-repos/{id}`` response + ``POST`` 201 body.","properties":{"auth_ref":{"title":"Auth Ref","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"default_branch":{"title":"Default Branch","type":"string"},"id":{"title":"Id","type":"string"},"last_merged_proposal":{"anyOf":[{"$ref":"#/components/schemas/ProposalSummary"},{"type":"null"}]},"name":{"title":"Name","type":"string"},"pr_base_branch":{"title":"Pr Base Branch","type":"string"},"provider":{"const":"github","title":"Provider","type":"string"},"repo_url":{"title":"Repo Url","type":"string"},"webhook_registration_error":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Webhook Registration Error"},"webhook_secret_ref":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Webhook Secret Ref"}},"required":["id","name","provider","repo_url","default_branch","pr_base_branch","auth_ref","webhook_secret_ref","webhook_registration_error","created_at"],"title":"ConfigRepoDetail","type":"object"},"ConfigReposListResponse":{"description":"``GET /api/v1/config-repos`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/ConfigRepoDetail"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"ConfigReposListResponse","type":"object"},"ConnectionTestRequest":{"description":"Body for ``POST /api/v1/clusters/test-connection`` (infra_adapter_solr Story A9).\n\nSame shape as ``CreateClusterRequest`` minus the persisted-only fields\n(``name``, ``environment``, ``notes``, ``target_filter``). ``engine_type``\n+ ``auth_kind`` are typed as ``str`` (not Literal) so a bad value yields\nthe project-standard 400 envelope rather than a raw 422 — same convention\nas ``CreateClusterRequest``.","properties":{"auth_kind":{"maxLength":64,"minLength":1,"title":"Auth Kind","type":"string"},"base_url":{"maxLength":512,"minLength":1,"title":"Base Url","type":"string"},"credentials_ref":{"maxLength":128,"minLength":1,"title":"Credentials Ref","type":"string"},"engine_config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Engine Config"},"engine_type":{"maxLength":64,"minLength":1,"title":"Engine Type","type":"string"}},"required":["engine_type","base_url","auth_kind","credentials_ref"],"title":"ConnectionTestRequest","type":"object"},"ConnectionTestResult":{"description":"Response for ``POST /api/v1/clusters/test-connection``.\n\nAlways 200 — reachable vs unreachable surfaces via ``reachable`` +\n``status`` fields. The endpoint is a diagnostic, never a mutation,\nso it never returns 503; invalid engine×auth pairings 400 BEFORE the\nnetwork call. (Cycle-delta F1.)","properties":{"engine_capabilities":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Engine Capabilities"},"error":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error"},"reachable":{"title":"Reachable","type":"boolean"},"status":{"enum":["green","yellow","red","unreachable"],"title":"Status","type":"string"},"version":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Version"}},"required":["reachable","status"],"title":"ConnectionTestResult","type":"object"},"ConvergenceShape":{"description":"Where the winner sits in the Optuna trial sequence + the classified regime.","properties":{"best_at_trial":{"title":"Best At Trial","type":"integer"},"regime":{"enum":["early_held","late_rising","noisy"],"title":"Regime","type":"string"},"total_trials":{"title":"Total Trials","type":"integer"}},"required":["best_at_trial","total_trials","regime"],"title":"ConvergenceShape","type":"object"},"ConversationDetail":{"description":"``GET /api/v1/conversations/{id}`` response.","properties":{"created_at":{"format":"date-time","title":"Created At","type":"string"},"id":{"title":"Id","type":"string"},"messages":{"items":{"$ref":"#/components/schemas/MessageWire"},"title":"Messages","type":"array"},"title":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Title"}},"required":["id","title","created_at","messages"],"title":"ConversationDetail","type":"object"},"ConversationSummary":{"description":"``GET /api/v1/conversations`` row + ``POST`` 201 body.\n\n``last_message_preview`` is the most recent user / assistant message's\n``content.text``, truncated at the repo layer to 120 chars (with ``…``\nsuffix when cut). Tool-role rows and assistant rows whose ``content.kind``\nis ``system_notice`` are skipped. ``None`` for brand-new conversations\nwith no qualifying messages — see ``chore_chat_last_message_preview``.\n\n``last_message_at`` is the ``created_at`` of that same row, or ``None``\nfor empty conversations. The list page uses it to render \"when did\nanyone last touch this thread\" instead of the conversation's\n``created_at``.","properties":{"created_at":{"format":"date-time","title":"Created At","type":"string"},"id":{"title":"Id","type":"string"},"last_message_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Last Message At"},"last_message_preview":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Last Message Preview"},"message_count":{"title":"Message Count","type":"integer"},"title":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Title"}},"required":["id","title","created_at","message_count"],"title":"ConversationSummary","type":"object"},"ConversationsListResponse":{"description":"``GET /api/v1/conversations`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/ConversationSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"ConversationsListResponse","type":"object"},"CreateClusterRequest":{"description":"Request body for ``POST /api/v1/clusters``.\n\nSee module docstring for the deliberate ``str`` vs ``Literal`` split.","properties":{"auth_kind":{"maxLength":64,"minLength":1,"title":"Auth Kind","type":"string"},"base_url":{"maxLength":512,"minLength":1,"title":"Base Url","type":"string"},"credentials_ref":{"maxLength":128,"minLength":1,"title":"Credentials Ref","type":"string"},"engine_config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Engine Config"},"engine_type":{"maxLength":64,"minLength":1,"title":"Engine Type","type":"string"},"environment":{"enum":["prod","staging","dev"],"title":"Environment","type":"string"},"name":{"maxLength":128,"minLength":1,"pattern":"^[a-z0-9][a-z0-9-]*$","title":"Name","type":"string"},"notes":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Notes"},"target_filter":{"anyOf":[{"maxLength":256,"minLength":1,"type":"string"},{"type":"null"}],"description":"Optional glob pattern (fnmatch.fnmatchcase: *, ?, [seq], [!seq]; no brace expansion). Scopes GET /clusters/{id}/targets to matching index names. Null = no filter.","title":"Target Filter"}},"required":["name","engine_type","environment","base_url","auth_kind","credentials_ref"],"title":"CreateClusterRequest","type":"object"},"CreateConfigRepoRequest":{"description":"Body of ``POST /api/v1/config-repos`` (FR-3).\n\n``provider`` is server-derived from ``repo_url`` (cycle-2 F4 from\nspec review) — NOT in the payload. The validator enforces a strict\nGitHub URL pattern; non-GitHub URLs surface as 400\n``UNSUPPORTED_PROVIDER`` at the router layer.","properties":{"auth_ref":{"maxLength":128,"minLength":1,"pattern":"^[a-zA-Z0-9_-]+$","title":"Auth Ref","type":"string"},"default_branch":{"default":"main","maxLength":128,"minLength":1,"title":"Default Branch","type":"string"},"name":{"maxLength":128,"minLength":1,"pattern":"^[a-z0-9][a-z0-9-]*$","title":"Name","type":"string"},"pr_base_branch":{"default":"main","maxLength":128,"minLength":1,"title":"Pr Base Branch","type":"string"},"repo_url":{"maxLength":512,"minLength":1,"title":"Repo Url","type":"string"},"webhook_secret_ref":{"anyOf":[{"maxLength":128,"pattern":"^[a-zA-Z0-9_-]+$","type":"string"},{"type":"null"}],"title":"Webhook Secret Ref"}},"required":["name","repo_url","auth_ref"],"title":"CreateConfigRepoRequest","type":"object"},"CreateConversationRequest":{"description":"``POST /api/v1/conversations`` body.","properties":{"title":{"anyOf":[{"maxLength":200,"type":"string"},{"type":"null"}],"title":"Title"}},"title":"CreateConversationRequest","type":"object"},"CreateJudgmentListFromUbiRequest":{"description":"Body for ``POST /api/v1/judgments/generate-from-ubi`` (Story 3.2 / FR-3).\n\nMirrors :class:`backend.app.services.agent_judgments_dispatch.UbiJudgmentGenerationRequest`.\nThe ``@model_validator(mode=\"after\")`` enforces the conditional\nrequiredness of ``current_template_id`` + ``rubric`` per the hybrid\nconverter: REQUIRED when ``converter == 'hybrid_ubi_llm'`` (the LLM-\nfill path needs both); FORBIDDEN otherwise (pure UBI never calls\nthe LLM so accepting them silently would mask operator error).","properties":{"cluster_id":{"maxLength":36,"minLength":1,"title":"Cluster Id","type":"string"},"converter":{"enum":["ctr_threshold","dwell_time","hybrid_ubi_llm"],"title":"Converter","type":"string"},"converter_config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Converter Config"},"current_template_id":{"anyOf":[{"maxLength":36,"minLength":36,"type":"string"},{"type":"null"}],"title":"Current Template Id"},"description":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Description"},"llm_fill_threshold":{"anyOf":[{"minimum":1.0,"type":"integer"},{"type":"null"}],"default":20,"title":"Llm Fill Threshold"},"mapping_strategy":{"default":"reject","enum":["reject","first_match","most_recent"],"title":"Mapping Strategy","type":"string"},"min_impressions_threshold":{"anyOf":[{"minimum":1.0,"type":"integer"},{"type":"null"}],"default":100,"title":"Min Impressions Threshold"},"name":{"maxLength":256,"minLength":1,"title":"Name","type":"string"},"query_set_id":{"maxLength":36,"minLength":1,"title":"Query Set Id","type":"string"},"rubric":{"anyOf":[{"minLength":1,"type":"string"},{"type":"null"}],"title":"Rubric"},"since":{"format":"date-time","title":"Since","type":"string"},"target":{"maxLength":256,"minLength":1,"title":"Target","type":"string"},"until":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Until"}},"required":["name","query_set_id","cluster_id","target","since","converter"],"title":"CreateJudgmentListFromUbiRequest","type":"object"},"CreateJudgmentListGenerateRequest":{"description":"Body for ``POST /api/v1/judgments/generate`` (Story 3.1).","properties":{"cluster_id":{"maxLength":36,"minLength":1,"title":"Cluster Id","type":"string"},"current_template_id":{"maxLength":36,"minLength":1,"title":"Current Template Id","type":"string"},"description":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Description"},"name":{"maxLength":256,"minLength":1,"title":"Name","type":"string"},"query_set_id":{"maxLength":36,"minLength":1,"title":"Query Set Id","type":"string"},"rubric":{"minLength":1,"title":"Rubric","type":"string"},"target":{"maxLength":256,"minLength":1,"title":"Target","type":"string"}},"required":["name","query_set_id","cluster_id","target","current_template_id","rubric"],"title":"CreateJudgmentListGenerateRequest","type":"object"},"CreateProposalRequest":{"description":"Body of ``POST /api/v1/proposals`` (manual proposal creation, FR-4 / AC-6).","properties":{"cluster_id":{"maxLength":36,"minLength":1,"title":"Cluster Id","type":"string"},"config_diff":{"additionalProperties":true,"title":"Config Diff","type":"object"},"metric_delta":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metric Delta"},"template_id":{"maxLength":36,"minLength":1,"title":"Template Id","type":"string"}},"required":["cluster_id","template_id","config_diff"],"title":"CreateProposalRequest","type":"object"},"CreateQuerySetRequest":{"description":"``POST /api/v1/query-sets`` body.\n\n``cluster_id`` is required because Phase 1's shipped schema has\n``query_sets.cluster_id NOT NULL``. Spec FR-3 wording (``cluster_id?``)\nis documented drift tracked at\n``docs/00_overview/planned_features/chore_spec_query_set_cluster_id_drift/idea.md``.","properties":{"cluster_id":{"maxLength":36,"minLength":1,"title":"Cluster Id","type":"string"},"description":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Description"},"name":{"maxLength":256,"minLength":1,"title":"Name","type":"string"}},"required":["name","cluster_id"],"title":"CreateQuerySetRequest","type":"object"},"CreateQueryTemplateRequest":{"description":"Request body for ``POST /api/v1/query-templates``.","properties":{"body":{"minLength":1,"title":"Body","type":"string"},"declared_params":{"additionalProperties":{"type":"string"},"title":"Declared Params","type":"object"},"engine_type":{"enum":["elasticsearch","opensearch","solr"],"title":"Engine Type","type":"string"},"name":{"maxLength":256,"minLength":1,"title":"Name","type":"string"},"parent_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Parent Id"}},"required":["name","engine_type","body"],"title":"CreateQueryTemplateRequest","type":"object"},"CreateStudyRequest":{"description":"``POST /api/v1/studies`` body.\n\n``search_space`` is validated post-Pydantic-parse via\n:class:`backend.app.domain.study.search_space.SearchSpace` so\n:exc:`pydantic.ValidationError` produces the spec's 400\n``INVALID_SEARCH_SPACE`` (per Story 3.3 task 2).\n\nfeat_digest_executable_followups Story 4.2 — optional ``parent`` field\nrecords the parent proposal + followup-index lineage when the study\nwas spawned from a digest \"Run this followup\" action (FR-11).","properties":{"cluster_id":{"maxLength":36,"minLength":1,"title":"Cluster Id","type":"string"},"config":{"$ref":"#/components/schemas/StudyConfigSpec"},"judgment_list_id":{"maxLength":36,"minLength":1,"title":"Judgment List Id","type":"string"},"name":{"maxLength":256,"minLength":1,"title":"Name","type":"string"},"objective":{"$ref":"#/components/schemas/ObjectiveSpec"},"parent":{"anyOf":[{"$ref":"#/components/schemas/ParentFollowupRef"},{"type":"null"}]},"parent_study_id":{"anyOf":[{"maxLength":36,"minLength":36,"type":"string"},{"type":"null"}],"description":"feat_study_clone_from_previous FR-7 — when the operator clones an existing study via the study-detail Clone button, this carries the source study's id. Server validates existence (404 PARENT_STUDY_NOT_FOUND) and same-cluster (422 PARENT_STUDY_WRONG_CLUSTER) before persisting to studies.parent_study_id. Independent of the proposal-lineage 'parent' field (D-5); both may be set.","title":"Parent Study Id"},"query_set_id":{"maxLength":36,"minLength":1,"title":"Query Set Id","type":"string"},"search_space":{"additionalProperties":true,"title":"Search Space","type":"object"},"target":{"maxLength":256,"minLength":1,"title":"Target","type":"string"},"template_id":{"maxLength":36,"minLength":1,"title":"Template Id","type":"string"}},"required":["name","cluster_id","target","template_id","query_set_id","judgment_list_id","search_space","objective","config"],"title":"CreateStudyRequest","type":"object"},"CurvePoint":{"description":"One point on the best-so-far curve.\n\n``trial_number`` is the trial's ``optuna_trial_number`` (the canonical\n\"trial order within the study\" field — see ``auto_followup.py`` module\ndocstring for why we sort by this rather than ``started_at``).\n``best_so_far`` is the running extremum of ``primary_metric`` over all\nearlier trials, sign-corrected to the study's optimization direction.","properties":{"best_so_far":{"title":"Best So Far","type":"number"},"trial_number":{"title":"Trial Number","type":"integer"}},"required":["trial_number","best_so_far"],"title":"CurvePoint","type":"object"},"DigestResponse":{"description":"Body of ``GET /api/v1/studies/{id}/digest`` (FR-3 / AC-3).\n\nfeat_digest_executable_followups Story 4.1 — ``suggested_followups`` is\nnow a discriminated-union list (NarrowFollowup | WidenFollowup |\nTextFollowup), populated by the digest handler via\n``parse_followup_list(digest.suggested_followups, ...)`` so legacy or\nmalformed JSONB payloads never crash the response.","properties":{"generated_at":{"format":"date-time","title":"Generated At","type":"string"},"generated_by":{"title":"Generated By","type":"string"},"id":{"title":"Id","type":"string"},"narrative":{"title":"Narrative","type":"string"},"parameter_importance":{"additionalProperties":{"type":"number"},"title":"Parameter Importance","type":"object"},"recommended_config":{"additionalProperties":true,"title":"Recommended Config","type":"object"},"study_id":{"title":"Study Id","type":"string"},"suggested_followups":{"items":{"$ref":"#/components/schemas/FollowupItem"},"title":"Suggested Followups","type":"array"}},"required":["id","study_id","narrative","parameter_importance","recommended_config","suggested_followups","generated_by","generated_at"],"title":"DigestResponse","type":"object"},"Document":{"description":"A single document by ID — return shape of ``SearchAdapter.get_document``.\n\nMirrors :class:`ScoredHit` minus ``score`` (browsing doesn't need scoring).\n``source`` is ``None`` when the engine's index has ``_source: false`` mapping.","properties":{"doc_id":{"minLength":1,"title":"Doc Id","type":"string"},"source":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Source"}},"required":["doc_id"],"title":"Document","type":"object"},"DocumentListResponse":{"description":"``GET /api/v1/clusters/{cluster_id}/targets/{target}/documents`` response.\n\n``next_cursor`` opaque-encodes the ES ``hits[-1].sort`` array of the\nlast visible row when ``has_more`` is True (see\n``backend.app.api.v1._documents_cursor``). The ``X-Total-Count`` header\non the response carries the engine's ``hits.total.value``.","properties":{"data":{"items":{"$ref":"#/components/schemas/DocumentSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"DocumentListResponse","type":"object"},"DocumentSummary":{"description":"One row in the documents list (per FR-3 / FR-8).\n\n``source`` is the *truncated* preview emitted by\n``backend.app.services.documents.truncate_source_for_list``. The detail\nendpoint returns the untruncated ``Document.source``.","properties":{"doc_id":{"minLength":1,"title":"Doc Id","type":"string"},"source":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Source"}},"required":["doc_id","source"],"title":"DocumentSummary","type":"object"},"FieldSpec":{"description":"One field returned by ``get_schema``.","properties":{"analyzer":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Analyzer"},"doc_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Doc Count"},"name":{"title":"Name","type":"string"},"type":{"title":"Type","type":"string"}},"required":["name","type"],"title":"FieldSpec","type":"object"},"FloatParam":{"additionalProperties":false,"description":"Continuous float parameter.\n\n``log=True`` enables log-uniform sampling\n(Optuna's ``suggest_float(..., log=True)``); requires ``low > 0``.","properties":{"high":{"title":"High","type":"number"},"log":{"default":false,"title":"Log","type":"boolean"},"low":{"title":"Low","type":"number"},"type":{"const":"float","title":"Type","type":"string"}},"required":["type","low","high"],"title":"FloatParam","type":"object"},"FollowupItem":{"discriminator":{"mapping":{"narrow":"#/components/schemas/NarrowFollowup","swap_template":"#/components/schemas/SwapTemplateFollowup","text":"#/components/schemas/TextFollowup","widen":"#/components/schemas/WidenFollowup"},"propertyName":"kind"},"oneOf":[{"$ref":"#/components/schemas/NarrowFollowup"},{"$ref":"#/components/schemas/WidenFollowup"},{"$ref":"#/components/schemas/TextFollowup"},{"$ref":"#/components/schemas/SwapTemplateFollowup"}]},"GenerateJudgmentsResponse":{"description":"Response of ``POST /api/v1/judgments/generate``.\n\nPer GPT-5.5 cycle 1 F5 — the endpoint registers a typed\n``response_model`` so OpenAPI introspection + contract tests can verify\nthe wire shape.","properties":{"judgment_list_id":{"title":"Judgment List Id","type":"string"},"status":{"const":"generating","title":"Status","type":"string"}},"required":["judgment_list_id","status"],"title":"GenerateJudgmentsResponse","type":"object"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"title":"Detail","type":"array"}},"title":"HTTPValidationError","type":"object"},"HeadlineShape":{"description":"Top-line metric value + N(queries) used in the CI.\n\n``metric`` uses ``str`` (not ``ObjectiveMetric``) to avoid a circular\nimport: ``schemas.py`` imports ``ConfidenceShape`` from here, so this\nmodule cannot import back from ``schemas.py``. The upstream value is\nalready validated by the existing ``ObjectiveMetric`` Literal at the\ncreate-study endpoint (``schemas.py:214``).","properties":{"k":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"K"},"metric":{"title":"Metric","type":"string"},"n_queries":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"N Queries"},"value":{"title":"Value","type":"number"}},"required":["metric","value","k","n_queries"],"title":"HeadlineShape","type":"object"},"HealthCheckResult":{"description":"Wire shape of the per-cluster health probe (mirrors ``HealthStatus``).","properties":{"checked_at":{"title":"Checked At","type":"string"},"error":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error"},"status":{"enum":["green","yellow","red","unreachable"],"title":"Status","type":"string"},"version":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Version"}},"required":["status","checked_at"],"title":"HealthCheckResult","type":"object"},"HealthResponse":{"description":"The /healthz response body. Same shape for HTTP 200 and 503.","properties":{"openai_capabilities":{"$ref":"#/components/schemas/OpenAICapabilities"},"openai_endpoint":{"description":"Configured OPENAI_BASE_URL","title":"Openai Endpoint","type":"string"},"status":{"enum":["ok","degraded"],"title":"Status","type":"string"},"subsystems":{"$ref":"#/components/schemas/Subsystems"},"uptime_seconds":{"description":"Seconds since the API process started","title":"Uptime Seconds","type":"integer"},"version":{"description":"Application version (relyloop_git_sha)","title":"Version","type":"string"}},"required":["status","subsystems","openai_endpoint","openai_capabilities","version","uptime_seconds"],"title":"HealthResponse","type":"object"},"ImportJudgmentItem":{"description":"One row in :class:`ImportJudgmentListRequest`.","properties":{"doc_id":{"maxLength":512,"minLength":1,"title":"Doc Id","type":"string"},"notes":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Notes"},"query_id":{"maxLength":36,"minLength":1,"title":"Query Id","type":"string"},"rating":{"enum":[0,1,2,3],"title":"Rating","type":"integer"}},"required":["query_id","doc_id","rating"],"title":"ImportJudgmentItem","type":"object"},"ImportJudgmentListRequest":{"description":"Body for ``POST /api/v1/judgment-lists/import`` (Story 3.2).","properties":{"cluster_id":{"maxLength":36,"minLength":1,"title":"Cluster Id","type":"string"},"description":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Description"},"judgments":{"items":{"$ref":"#/components/schemas/ImportJudgmentItem"},"maxItems":100000,"minItems":1,"title":"Judgments","type":"array"},"name":{"maxLength":256,"minLength":1,"title":"Name","type":"string"},"query_set_id":{"maxLength":36,"minLength":1,"title":"Query Set Id","type":"string"},"rubric":{"minLength":1,"title":"Rubric","type":"string"},"target":{"maxLength":256,"minLength":1,"title":"Target","type":"string"}},"required":["name","query_set_id","cluster_id","target","rubric","judgments"],"title":"ImportJudgmentListRequest","type":"object"},"IntParam":{"additionalProperties":false,"description":"Integer parameter inclusive of both bounds.","properties":{"high":{"title":"High","type":"integer"},"low":{"title":"Low","type":"integer"},"type":{"const":"int","title":"Type","type":"string"}},"required":["type","low","high"],"title":"IntParam","type":"object"},"JudgmentListDetail":{"description":"``GET /api/v1/judgment-lists/{id}`` response.\n\nNote: ``generation_params`` is populated for UBI lists (feat_ubi_judgments\nStory 1.1's JSONB column) and NULL for LLM lists. The Story 4.3 UI\n(```` + ````) reads the\npayload to discriminate UBI/hybrid lists and to reconstruct the\noriginal request for the ambiguous-skip \"Re-run with most_recent\"\naffordance.","properties":{"calibration":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Calibration"},"cluster_id":{"title":"Cluster Id","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"current_template_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Current Template Id"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"failed_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Failed Reason"},"generation_params":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Generation Params"},"id":{"title":"Id","type":"string"},"judgment_count":{"title":"Judgment Count","type":"integer"},"name":{"title":"Name","type":"string"},"query_set_id":{"title":"Query Set Id","type":"string"},"rubric":{"title":"Rubric","type":"string"},"source_breakdown":{"$ref":"#/components/schemas/_SourceBreakdown"},"status":{"enum":["generating","complete","failed"],"title":"Status","type":"string"},"target":{"title":"Target","type":"string"}},"required":["id","name","description","query_set_id","cluster_id","target","current_template_id","rubric","status","failed_reason","judgment_count","source_breakdown","calibration","generation_params","created_at"],"title":"JudgmentListDetail","type":"object"},"JudgmentListJudgmentsResponse":{"description":"``GET /api/v1/judgment-lists/{id}/judgments`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/JudgmentRow"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"JudgmentListJudgmentsResponse","type":"object"},"JudgmentListListResponse":{"description":"``GET /api/v1/judgment-lists`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/JudgmentListSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"JudgmentListListResponse","type":"object"},"JudgmentListRef":{"description":"One entry in the ``QUERY_HAS_JUDGMENTS`` 409 envelope.\n\nLives in ``detail.judgment_lists``. Maps from the repo-layer\n:class:`backend.app.db.repo.judgment.JudgmentListRefRow` at the\nrouter boundary.","properties":{"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"}},"required":["id","name"],"title":"JudgmentListRef","type":"object"},"JudgmentListSummary":{"description":"List-view row on ``GET /api/v1/judgment-lists``.","properties":{"cluster_id":{"title":"Cluster Id","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"query_set_id":{"title":"Query Set Id","type":"string"},"status":{"enum":["generating","complete","failed"],"title":"Status","type":"string"},"target":{"title":"Target","type":"string"}},"required":["id","name","description","query_set_id","cluster_id","target","status","created_at"],"title":"JudgmentListSummary","type":"object"},"JudgmentRow":{"description":"``GET /api/v1/judgment-lists/{id}/judgments`` row + PATCH response.","properties":{"confidence":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Confidence"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"doc_id":{"title":"Doc Id","type":"string"},"id":{"title":"Id","type":"string"},"judgment_list_id":{"title":"Judgment List Id","type":"string"},"notes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Notes"},"query_id":{"title":"Query Id","type":"string"},"rater_ref":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rater Ref"},"rating":{"enum":[0,1,2,3],"title":"Rating","type":"integer"},"source":{"enum":["llm","human","click"],"title":"Source","type":"string"}},"required":["id","judgment_list_id","query_id","doc_id","rating","source","rater_ref","confidence","notes","created_at"],"title":"JudgmentRow","type":"object"},"LateTrialStddevShape":{"description":"Sample stddev of ``primary_metric`` over the late-trial window.","properties":{"min_window_required":{"title":"Min Window Required","type":"integer"},"value":{"title":"Value","type":"number"},"window_size":{"title":"Window Size","type":"integer"}},"required":["value","window_size","min_window_required"],"title":"LateTrialStddevShape","type":"object"},"MessageWire":{"description":"One row of ``GET /api/v1/conversations/{id}.messages``.","properties":{"content":{"additionalProperties":true,"title":"Content","type":"object"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"id":{"title":"Id","type":"string"},"role":{"enum":["user","assistant","tool"],"title":"Role","type":"string"},"tool_calls":{"anyOf":[{"items":{"additionalProperties":true,"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"required":["id","role","content","created_at"],"title":"MessageWire","type":"object"},"NarrowFollowup":{"additionalProperties":false,"description":"A 'narrow' followup — re-run with a tighter range than the parent.","properties":{"kind":{"const":"narrow","title":"Kind","type":"string"},"rationale":{"title":"Rationale","type":"string"},"search_space":{"$ref":"#/components/schemas/SearchSpace"}},"required":["kind","rationale","search_space"],"title":"NarrowFollowup","type":"object"},"ObjectiveSpec":{"description":"Wire shape of ``studies.objective`` (write-side validated at create).\n\n``k`` is required for ``ndcg`` / ``precision`` / ``recall`` (per\nstandard IR-evaluation conventions: those metrics are computed at a\ncutoff rank). ``map`` accepts ``k`` optionally; ``mrr`` / ``err`` ignore\nit. The model_validator enforces this so a malformed objective\nsurfaces as 400 ``INVALID_SEARCH_SPACE`` / 422 ``VALIDATION_ERROR``\nat study-create time rather than failing later inside ``run_trial``\nwhen the worker computes the metric.","properties":{"direction":{"default":"maximize","enum":["maximize","minimize"],"title":"Direction","type":"string"},"k":{"anyOf":[{"enum":[1,3,5,10,20,50,100],"type":"integer"},{"type":"null"}],"title":"K"},"metric":{"enum":["ndcg","map","precision","recall","mrr"],"title":"Metric","type":"string"}},"required":["metric"],"title":"ObjectiveSpec","type":"object"},"OpenAICapabilities":{"description":"Cached results of the OpenAI capability check (Story 3.3 populates Redis).\n\nStep 1 (``models_endpoint``) is reported first because it gates the rest:\nwhen it fails, the other three are reported as ``\"untested\"``. The\n``models_endpoint_status_code`` field is required-but-nullable\n(per ``bug_openai_capability_check_incapable_on_valid_key`` spec §19 D-3/D-8)\n— always present in the JSON, ``null`` when not applicable. This lets\noperators distinguish ``401 -> bad key``, ``429 -> quota``,\n``5xx -> upstream outage``, ``null -> network unreachable / cache miss``.","properties":{"chat":{"description":"Chat completion probe result","enum":["ok","fail","untested"],"title":"Chat","type":"string"},"function_calling":{"description":"Function-calling probe result (tool_choice=required)","enum":["ok","fail","untested"],"title":"Function Calling","type":"string"},"models_endpoint":{"description":"GET /models probe outcome. 'ok' / 'fail' are projected from CapabilityResult.models_endpoint; 'untested' is the cache-miss default, matching the existing chat / function_calling / structured_output cache-miss handling.","enum":["ok","fail","untested"],"title":"Models Endpoint","type":"string"},"models_endpoint_status_code":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"HTTP status code from the GET /models probe when it HTTP-failed (>= 400). null for the success path, network-class failure (timeout / DNS / connection-refused), or cache miss. Required-but-nullable: the JSON key is always present with explicit null when no value, never omitted.","title":"Models Endpoint Status Code"},"structured_output":{"description":"JSON-schema response_format probe result","enum":["ok","fail","untested"],"title":"Structured Output","type":"string"}},"required":["models_endpoint","models_endpoint_status_code","chat","function_calling","structured_output"],"title":"OpenAICapabilities","type":"object"},"OpenPrResponse":{"description":"Body of ``POST /api/v1/proposals/{id}/open_pr`` (FR-1).\n\nReturned with HTTP 202 on successful enqueue. Status is always\n``'pending'`` at enqueue time; the worker flips it to ``'pr_opened'``\nafter the PR is open.","properties":{"message":{"title":"Message","type":"string"},"proposal_id":{"title":"Proposal Id","type":"string"},"status":{"const":"pending","title":"Status","type":"string"}},"required":["proposal_id","status","message"],"title":"OpenPrResponse","type":"object"},"OverrideJudgmentRequest":{"description":"Body for ``PATCH /api/v1/judgment-lists/{id}/judgments/{judgment_id}``.\n\n``rating`` is INTENTIONALLY unbounded at the Pydantic layer — spec §8.5\nrequires out-of-range failures to surface as 400 ``INVALID_RATING`` (not\nPydantic's default 422 ``VALIDATION_ERROR``). The handler validates the\nvalue manually and raises the domain code (per GPT-5.5 cycle 1 F4).","properties":{"notes":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Notes"},"rating":{"title":"Rating","type":"integer"}},"required":["rating"],"title":"OverrideJudgmentRequest","type":"object"},"ParentFollowupRef":{"description":"Optional lineage payload on ``POST /api/v1/studies``.\n\nfeat_digest_executable_followups FR-11 — when the operator clicks\n\"Run this followup\" on a proposal's digest card, the create-study\npayload carries the parent proposal's id + the 0-based index into\nthe digest's ``suggested_followups`` array so the spawned study\nremembers where it came from.\n\n``proposal_id`` is a UUIDv7 (36-char hex). The exact-length bound\nforces malformed strings to surface as 422 ``VALIDATION_ERROR``\nrather than reach the DB FK check and emerge as a 404\n``PROPOSAL_NOT_FOUND``.","properties":{"followup_index":{"minimum":0.0,"title":"Followup Index","type":"integer"},"proposal_id":{"maxLength":36,"minLength":36,"title":"Proposal Id","type":"string"}},"required":["proposal_id","followup_index"],"title":"ParentFollowupRef","type":"object"},"PerQueryOutcomesShape":{"description":"Per-query outcome counts + the top-5 named regressors and improvers.","properties":{"comparison_against":{"enum":["runner_up","baseline"],"title":"Comparison Against","type":"string"},"improved":{"title":"Improved","type":"integer"},"regressed":{"title":"Regressed","type":"integer"},"top_improvers":{"default":[],"items":{"$ref":"#/components/schemas/RegressorRowShape"},"title":"Top Improvers","type":"array"},"top_regressors":{"items":{"$ref":"#/components/schemas/RegressorRowShape"},"title":"Top Regressors","type":"array"},"unchanged":{"title":"Unchanged","type":"integer"}},"required":["improved","unchanged","regressed","comparison_against","top_regressors"],"title":"PerQueryOutcomesShape","type":"object"},"ProposalDetail":{"description":"Body of the proposal detail endpoints.\n\nUsed by ``GET /api/v1/proposals/{id}``, ``POST /api/v1/proposals``,\nand ``POST /api/v1/proposals/{id}/reject``.","properties":{"cluster":{"$ref":"#/components/schemas/_ClusterEmbed"},"config_diff":{"additionalProperties":true,"title":"Config Diff","type":"object"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"digest":{"anyOf":[{"$ref":"#/components/schemas/_DigestEmbed"},{"type":"null"}]},"id":{"title":"Id","type":"string"},"is_currently_live":{"default":false,"title":"Is Currently Live","type":"boolean"},"metric_delta":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metric Delta"},"pr_merged_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Pr Merged At"},"pr_open_error":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Pr Open Error"},"pr_state":{"anyOf":[{"enum":["open","closed","merged"],"type":"string"},{"type":"null"}],"title":"Pr State"},"pr_url":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Pr Url"},"rejected_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rejected Reason"},"status":{"enum":["pending","pr_opened","pr_merged","rejected"],"title":"Status","type":"string"},"study_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Study Id"},"study_summary":{"anyOf":[{"$ref":"#/components/schemas/_StudySummary"},{"type":"null"}]},"study_trial_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Study Trial Id"},"template":{"$ref":"#/components/schemas/_TemplateEmbed"}},"required":["id","study_id","study_summary","study_trial_id","cluster","template","config_diff","metric_delta","status","pr_url","pr_state","pr_merged_at","pr_open_error","rejected_reason","digest","created_at"],"title":"ProposalDetail","type":"object"},"ProposalSummary":{"description":"Row in the ``GET /api/v1/proposals`` list response.","properties":{"cluster":{"$ref":"#/components/schemas/_ClusterEmbed"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"id":{"title":"Id","type":"string"},"is_currently_live":{"default":false,"title":"Is Currently Live","type":"boolean"},"metric_delta":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metric Delta"},"pr_state":{"anyOf":[{"enum":["open","closed","merged"],"type":"string"},{"type":"null"}],"title":"Pr State"},"pr_url":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Pr Url"},"status":{"enum":["pending","pr_opened","pr_merged","rejected"],"title":"Status","type":"string"},"study_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Study Id"},"template":{"$ref":"#/components/schemas/_TemplateEmbed"}},"required":["id","study_id","cluster","template","status","pr_state","pr_url","metric_delta","created_at"],"title":"ProposalSummary","type":"object"},"ProposalsListResponse":{"description":"Body of ``GET /api/v1/proposals``.","properties":{"data":{"items":{"$ref":"#/components/schemas/ProposalSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"ProposalsListResponse","type":"object"},"QueryHasJudgmentsDetail":{"description":"The ``detail`` object of a 409 ``QUERY_HAS_JUDGMENTS`` response.\n\nExtends the canonical ``{error_code, message, retryable}`` envelope\nwith two structured fields the frontend consumes directly\n(``judgment_lists`` + ``overflow_count``). Wired into the FastAPI\nroute's ``responses={409: {\"model\": QueryHasJudgmentsEnvelope}}`` so\nthe OpenAPI schema documents the contract.","properties":{"error_code":{"const":"QUERY_HAS_JUDGMENTS","title":"Error Code","type":"string"},"judgment_lists":{"items":{"$ref":"#/components/schemas/JudgmentListRef"},"title":"Judgment Lists","type":"array"},"message":{"title":"Message","type":"string"},"overflow_count":{"title":"Overflow Count","type":"integer"},"retryable":{"const":false,"title":"Retryable","type":"boolean"}},"required":["error_code","message","retryable","judgment_lists","overflow_count"],"title":"QueryHasJudgmentsDetail","type":"object"},"QueryHasJudgmentsEnvelope":{"description":"Top-level 409 wrapper (FastAPI nests under ``detail`` for HTTPException).","properties":{"detail":{"$ref":"#/components/schemas/QueryHasJudgmentsDetail"}},"required":["detail"],"title":"QueryHasJudgmentsEnvelope","type":"object"},"QueryListResponse":{"description":"``GET /api/v1/query-sets/{set_id}/queries`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/QueryRow"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"QueryListResponse","type":"object"},"QueryRow":{"description":"Wire row returned by the per-query GET + PATCH endpoints.\n\nUsed by both ``GET /api/v1/query-sets/{set_id}/queries`` and\n``PATCH /api/v1/query-sets/{set_id}/queries/{query_id}``.\n``judgment_count`` is a derived field — single batched GROUP BY in the\nrouter via :func:`backend.app.db.repo.judgment.count_judgments_per_query`.","properties":{"id":{"title":"Id","type":"string"},"judgment_count":{"title":"Judgment Count","type":"integer"},"query_metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Query Metadata"},"query_text":{"title":"Query Text","type":"string"},"reference_answer":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Reference Answer"}},"required":["id","query_text","reference_answer","query_metadata","judgment_count"],"title":"QueryRow","type":"object"},"QuerySetDetail":{"description":"``GET /api/v1/query-sets/{id}`` response.","properties":{"cluster_id":{"title":"Cluster Id","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"query_count":{"title":"Query Count","type":"integer"}},"required":["id","name","description","cluster_id","query_count","created_at"],"title":"QuerySetDetail","type":"object"},"QuerySetListResponse":{"description":"``GET /api/v1/query-sets`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/QuerySetSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"QuerySetListResponse","type":"object"},"QuerySetSummary":{"description":"List-view shape.\n\n``query_count`` is the number of queries in the set. It is resolved\nvia a single batched ``GROUP BY query_set_id`` aggregate per page\n(``repo.count_queries_for_sets``), NOT a per-row count — so the\nlist endpoint stays at a fixed 2 queries (the page + the count\naggregate) regardless of page size. This is the same no-N+1 pattern\n``feat_studies_convergence_visibility`` (PR #421) used for the\nstudies-list ``trial_count`` field.","properties":{"cluster_id":{"title":"Cluster Id","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"query_count":{"title":"Query Count","type":"integer"}},"required":["id","name","cluster_id","query_count","created_at"],"title":"QuerySetSummary","type":"object"},"QueryTemplateDetail":{"description":"``GET /api/v1/query-templates/{id}`` response.","properties":{"body":{"title":"Body","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"declared_params":{"additionalProperties":{"type":"string"},"title":"Declared Params","type":"object"},"engine_type":{"enum":["elasticsearch","opensearch","solr"],"title":"Engine Type","type":"string"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"parent_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Parent Id"},"version":{"title":"Version","type":"integer"}},"required":["id","name","engine_type","body","declared_params","version","parent_id","created_at"],"title":"QueryTemplateDetail","type":"object"},"QueryTemplateListResponse":{"description":"``GET /api/v1/query-templates`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/QueryTemplateSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"QueryTemplateListResponse","type":"object"},"QueryTemplateSummary":{"description":"List-view shape; drops ``body`` + the full ``declared_params`` dict.\n\nSurfaces ``param_count`` (= ``len(declared_params)``) so the\ntemplates list can show each template's tuning surface at a glance.\n``param_count`` is free to compute — ``declared_params`` is a JSONB\ncolumn already loaded on the row (not a child relationship), so the\ncount is ``len(row.declared_params)`` with no extra query and no\nN+1 risk. The full dict remains on ``QueryTemplateDetail``.","properties":{"created_at":{"format":"date-time","title":"Created At","type":"string"},"engine_type":{"enum":["elasticsearch","opensearch","solr"],"title":"Engine Type","type":"string"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"param_count":{"title":"Param Count","type":"integer"},"version":{"title":"Version","type":"integer"}},"required":["id","name","engine_type","version","param_count","created_at"],"title":"QueryTemplateSummary","type":"object"},"RegressorRowShape":{"description":"One row in the named-regressors or named-improvers table.\n\nUsed for BOTH the ``top_regressors`` and ``top_improvers`` lists.\nThe wire shape is identical — ``delta = winner_score - comparison_score``\nis negative on the regressor list, positive on the improver list. The\nclass name is historical (regressors shipped first); reusing the same\ntype keeps the schema and the per-row renderer compact.","properties":{"comparison_score":{"title":"Comparison Score","type":"number"},"delta":{"title":"Delta","type":"number"},"query_id":{"title":"Query Id","type":"string"},"query_text":{"title":"Query Text","type":"string"},"winner_score":{"title":"Winner Score","type":"number"}},"required":["query_id","query_text","winner_score","comparison_score","delta"],"title":"RegressorRowShape","type":"object"},"RejectProposalRequest":{"description":"Body of ``POST /api/v1/proposals/{id}/reject`` (FR-4 / AC-5).","properties":{"reason":{"anyOf":[{"maxLength":500,"type":"string"},{"type":"null"}],"title":"Reason"}},"title":"RejectProposalRequest","type":"object"},"ReseedStatusResponse":{"additionalProperties":false,"description":"Polling-endpoint response for ``GET /api/v1/_test/demo/reseed/status``.\n\nPer ``bug_demo_reseed_fake_metric_regression`` D-2. Lives in Redis as a\nsingle JSON blob keyed by :data:`DEMO_RESEED_STATUS_KEY` so the\nhandler reads it in one round-trip.","properties":{"current_step":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Current Step"},"failed_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Failed Reason"},"finished_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Finished At"},"scenarios_completed":{"default":0,"title":"Scenarios Completed","type":"integer"},"scenarios_skipped":{"items":{"type":"string"},"title":"Scenarios Skipped","type":"array"},"scenarios_total":{"default":0,"title":"Scenarios Total","type":"integer"},"started_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Started At"},"status":{"enum":["idle","running","complete","failed"],"title":"Status","type":"string"},"steps":{"items":{"type":"string"},"title":"Steps","type":"array"},"summary":{"anyOf":[{"$ref":"#/components/schemas/ReseedSummary"},{"type":"null"}]}},"required":["status"],"title":"ReseedStatusResponse","type":"object"},"ReseedSummary":{"additionalProperties":false,"description":"Returned by :func:`reseed_demo_state` on success.\n\nPer spec §9 Required invariants, every counter is exactly 4 on the\nhappy path; ``duration_ms`` is wall-clock from orchestration start\nto the rename commit.","properties":{"clusters_created":{"title":"Clusters Created","type":"integer"},"duration_ms":{"title":"Duration Ms","type":"integer"},"proposals_created":{"title":"Proposals Created","type":"integer"},"query_sets_created":{"title":"Query Sets Created","type":"integer"},"studies_completed":{"title":"Studies Completed","type":"integer"}},"required":["clusters_created","query_sets_created","studies_completed","proposals_created","duration_ms"],"title":"ReseedSummary","type":"object"},"RunQueryHit":{"description":"One hit in the ``run_query`` response.","properties":{"doc_id":{"title":"Doc Id","type":"string"},"score":{"title":"Score","type":"number"},"source":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Source"}},"required":["doc_id","score"],"title":"RunQueryHit","type":"object"},"RunQueryRequest":{"description":"``POST /api/v1/clusters/{id}/run_query`` body.","properties":{"query_dsl":{"additionalProperties":true,"title":"Query Dsl","type":"object"},"target":{"maxLength":256,"minLength":1,"title":"Target","type":"string"},"top_k":{"default":10,"maximum":1000.0,"minimum":1.0,"title":"Top K","type":"integer"}},"required":["target","query_dsl"],"title":"RunQueryRequest","type":"object"},"RunQueryResponse":{"description":"``POST /api/v1/clusters/{id}/run_query`` response.","properties":{"hits":{"items":{"$ref":"#/components/schemas/RunQueryHit"},"title":"Hits","type":"array"}},"required":["hits"],"title":"RunQueryResponse","type":"object"},"RunnerUpGapShape":{"description":"Runner-up trial's metric vs the winner.\n\nThe whole shape is suppressed to ``None`` when there are <2 complete\ntrials (FR-2 + FR-7); ``classification`` is non-null whenever this shape\nis present.","properties":{"classification":{"enum":["robust_plateau","sharp_peak"],"title":"Classification","type":"string"},"runner_up_metric":{"title":"Runner Up Metric","type":"number"},"top10_within":{"title":"Top10 Within","type":"number"},"value":{"title":"Value","type":"number"}},"required":["value","classification","top10_within","runner_up_metric"],"title":"RunnerUpGapShape","type":"object"},"Schema":{"description":"An index / collection's field schema.","properties":{"fields":{"items":{"$ref":"#/components/schemas/FieldSpec"},"title":"Fields","type":"array"},"name":{"title":"Name","type":"string"}},"required":["name","fields"],"title":"Schema","type":"object"},"SearchSpace":{"additionalProperties":false,"description":"Pydantic model for the ``studies.search_space`` JSONB column.\n\nWire format::\n\n {\n \"params\": {\n \"boost_title\": {\"type\": \"float\", \"low\": 0.1, \"high\": 10.0, \"log\": true},\n \"min_should_match\": {\"type\": \"int\", \"low\": 1, \"high\": 5},\n \"operator\": {\"type\": \"categorical\", \"choices\": [\"and\", \"or\"]},\n }\n }","properties":{"params":{"additionalProperties":{"discriminator":{"mapping":{"categorical":"#/components/schemas/CategoricalParam","float":"#/components/schemas/FloatParam","int":"#/components/schemas/IntParam"},"propertyName":"type"},"oneOf":[{"$ref":"#/components/schemas/FloatParam"},{"$ref":"#/components/schemas/IntParam"},{"$ref":"#/components/schemas/CategoricalParam"}]},"minProperties":1,"title":"Params","type":"object"}},"required":["params"],"title":"SearchSpace","type":"object"},"SeedAutoFollowupChainRequest":{"additionalProperties":false,"description":"Payload for ``POST /api/v1/_test/auto-followup/seed-chain``.\n\nSeeds ``depth + 1`` linked studies (root → … → leaf) so E2E tests can\ncover the chain-panel parent-link / children-table / cascade-radio paths\nthat the public ``POST /api/v1/studies`` endpoint can't drive\n(``parent_study_id`` is set only by the auto-followup worker).\n\nCloses ``chore_auto_followup_e2e_chain_seed_helper`` (idea #2).","properties":{"cluster_id":{"minLength":1,"title":"Cluster Id","type":"string"},"depth":{"description":"Number of chain hops to seed. depth=1 → root + leaf (2 nodes). depth=2 → root + 1 middle + leaf (3 nodes).","maximum":5.0,"minimum":1.0,"title":"Depth","type":"integer"},"in_flight_leaf":{"default":true,"description":"When True (default), the deepest node is left at status='queued'. When False, it's driven to 'completed' too. Default True matches the primary E2E use case: cascade-radio coverage where the middle node needs an in-flight child.","title":"In Flight Leaf","type":"boolean"},"in_flight_middle":{"default":true,"description":"When True (default), the immediate parent of the leaf is left at status='queued' so the Cancel button is enabled (canCancel = running || queued per study-action-bar.tsx:46). Required for the cancel-modal cascade-radio test. When False, all intermediates are completed (more realistic chain state but cancel modal won't open on the middle).","title":"In Flight Middle","type":"boolean"},"judgment_list_id":{"minLength":1,"title":"Judgment List Id","type":"string"},"query_set_id":{"minLength":1,"title":"Query Set Id","type":"string"},"template_id":{"minLength":1,"title":"Template Id","type":"string"}},"required":["cluster_id","query_set_id","template_id","judgment_list_id","depth"],"title":"SeedAutoFollowupChainRequest","type":"object"},"SeedAutoFollowupChainResponse":{"description":"IDs of every node in the seeded chain, in parent→child order.","properties":{"leaf_id":{"title":"Leaf Id","type":"string"},"middle_ids":{"items":{"type":"string"},"title":"Middle Ids","type":"array"},"root_id":{"title":"Root Id","type":"string"}},"required":["root_id","middle_ids","leaf_id"],"title":"SeedAutoFollowupChainResponse","type":"object"},"SeedCompletedStudyRequest":{"additionalProperties":false,"description":"Payload for ``POST /api/v1/_test/studies/seed-completed``.\n\nAll four FK fields are required; the caller is responsible for\nseeding the parent rows first (typically via the public\n``seedFullChain`` E2E helper).","properties":{"cluster_id":{"minLength":1,"title":"Cluster Id","type":"string"},"extra_trial_metrics":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"description":"Optional list of additional complete-trial `primary_metric` values (numbered from 2 upward) seeded on top of the default winner (0.487) + runner-up (0.412). Used to push the study past the convergence classifier's usable-trial floor (5) so the `` renders a real verdict + curve instead of the too_few_trials null state (feat_study_convergence_indicator). Every value MUST be < 0.487 so the winner / best_metric / proposal / digest stay anchored to the unchanged 0.412 -> 0.487 story. Omit for the default 2-trial shape.","title":"Extra Trial Metrics"},"judgment_list_id":{"minLength":1,"title":"Judgment List Id","type":"string"},"query_set_id":{"minLength":1,"title":"Query Set Id","type":"string"},"runner_up_per_query":{"anyOf":[{"additionalProperties":{"additionalProperties":true,"type":"object"},"type":"object"},{"type":"null"}],"description":"Optional per-query metrics for the runner-up trial; pairs with `winner_per_query`.","title":"Runner Up Per Query"},"suggested_followups":{"anyOf":[{"items":{"additionalProperties":true,"type":"object"},"type":"array"},{"type":"null"}],"description":"feat_digest_executable_followups Story 6.1 — optional structured FollowupItem list (`[{kind, rationale, search_space}]`) to seed on the digest. When omitted, the seeder writes two default text-kind items. The E2E Run-followup spec passes a `narrow` item so it can drive the per-card Run button + modal prefill flow.","title":"Suggested Followups"},"template_id":{"minLength":1,"title":"Template Id","type":"string"},"winner_per_query":{"anyOf":[{"additionalProperties":{"additionalProperties":true,"type":"object"},"type":"object"},{"type":"null"}],"description":"Optional per-query metrics dict to populate on the winner trial. Shape: `{query_id: {metric_token: float}}` where metric_token matches what `scoring.score()` emits (e.g. `ndcg@10`). Set alongside `runner_up_per_query` to drive the ConfidencePanel happy path on `/studies/[id]`. When omitted, the seeded trials have `per_query_metrics IS NULL` (the pre-feat_pr_metric_confidence shape).","title":"Winner Per Query"},"with_pending_proposal":{"default":true,"description":"When true (default), also insert a `status='pending'` proposal linked to the study so the digest panel's Open PR button renders enabled. Set false to test the AC-11 aria-disabled-button + tooltip path.","title":"With Pending Proposal","type":"boolean"}},"required":["cluster_id","query_set_id","template_id","judgment_list_id"],"title":"SeedCompletedStudyRequest","type":"object"},"SeedCompletedStudyResponse":{"description":"IDs of the inserted rows; mirrors :class:`SeededStudyTriple`.","properties":{"digest_id":{"title":"Digest Id","type":"string"},"proposal_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Proposal Id"},"study_id":{"title":"Study Id","type":"string"}},"required":["study_id","digest_id","proposal_id"],"title":"SeedCompletedStudyResponse","type":"object"},"SendMessageRequest":{"description":"``POST /api/v1/conversations/{id}/messages`` body (Story 3.2).","properties":{"content":{"$ref":"#/components/schemas/SendMessageRequestContent"},"role":{"const":"user","default":"user","title":"Role","type":"string"}},"required":["content"],"title":"SendMessageRequest","type":"object"},"SendMessageRequestContent":{"description":"Sub-shape inside :class:`SendMessageRequest`.","properties":{"text":{"maxLength":20000,"minLength":1,"title":"Text","type":"string"}},"required":["text"],"title":"SendMessageRequestContent","type":"object"},"StudyChainLink":{"description":"One link in the rolled-up overnight-chain summary (feat_overnight_autopilot §8.3).","properties":{"auto_followup_depth_remaining":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Auto Followup Depth Remaining"},"baseline_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Baseline Metric"},"best_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Best Metric"},"completed_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Completed At"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"delta_from_prev":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Delta From Prev"},"direction":{"enum":["maximize","minimize"],"title":"Direction","type":"string"},"failed_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Failed Reason"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"proposal_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Proposal Id"},"status":{"enum":["queued","running","completed","cancelled","failed"],"title":"Status","type":"string"}},"required":["id","name","status","best_metric","baseline_metric","direction","delta_from_prev","proposal_id","auto_followup_depth_remaining","failed_reason","created_at","completed_at"],"title":"StudyChainLink","type":"object"},"StudyChainResponse":{"description":"``GET /api/v1/studies/{id}/chain`` response (feat_overnight_autopilot §8.3).","properties":{"anchor_study_id":{"title":"Anchor Study Id","type":"string"},"best_link_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Best Link Id"},"best_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Best Metric"},"cumulative_lift":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Cumulative Lift"},"direction":{"enum":["maximize","minimize"],"title":"Direction","type":"string"},"links":{"items":{"$ref":"#/components/schemas/StudyChainLink"},"title":"Links","type":"array"},"proposal_id_for_best_link":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Proposal Id For Best Link"},"stop_reason":{"enum":["depth_exhausted","no_lift","budget","parent_failed","cancelled","in_flight"],"title":"Stop Reason","type":"string"}},"required":["anchor_study_id","best_link_id","best_metric","cumulative_lift","direction","stop_reason","proposal_id_for_best_link","links"],"title":"StudyChainResponse","type":"object"},"StudyConfigSpec":{"description":"Wire shape of ``studies.config`` (write-side).\n\nThe model_validator below enforces that at least one stop condition is\nset — otherwise the study has no terminating condition (FR-4).\n``parallelism`` / ``trial_timeout_s`` are optional; when absent the\nworker reads ``Settings.studies_default_parallelism`` /\n``studies_default_timeout_s`` at job time. The API layer does NOT\nmaterialize these fields into the stored row — see Story 1.5 +\nStory 3.3's ``config.model_dump(exclude_none=True, exclude_unset=True)``\ncontract.","properties":{"auto_followup_depth":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Auto Followup Depth"},"baseline_params":{"anyOf":[{"additionalProperties":{"anyOf":[{"type":"string"},{"type":"integer"},{"type":"number"},{"type":"boolean"},{"type":"null"}]},"type":"object"},{"type":"null"}],"title":"Baseline Params"},"max_trials":{"anyOf":[{"maximum":100000.0,"minimum":1.0,"type":"integer"},{"type":"null"}],"title":"Max Trials"},"parallelism":{"anyOf":[{"maximum":64.0,"minimum":1.0,"type":"integer"},{"type":"null"}],"title":"Parallelism"},"pruner":{"anyOf":[{"enum":["median","none"],"type":"string"},{"type":"null"}],"title":"Pruner"},"sampler":{"anyOf":[{"enum":["tpe","random"],"type":"string"},{"type":"null"}],"title":"Sampler"},"secondary_metrics":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Secondary Metrics"},"seed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Seed"},"time_budget_min":{"anyOf":[{"exclusiveMinimum":0.0,"type":"number"},{"type":"null"}],"title":"Time Budget Min"},"trial_timeout_s":{"anyOf":[{"maximum":3600.0,"minimum":5.0,"type":"integer"},{"type":"null"}],"title":"Trial Timeout S"}},"title":"StudyConfigSpec","type":"object"},"StudyConvergenceShape":{"description":"Verdict + supporting numerics for the UI panel and the digest narrative.\n\nMirrors the ``ConfidenceShape`` pattern from ``confidence.py``: the\ndomain module owns the Pydantic model, and ``backend.app.api.v1.schemas``\nre-exports it for the ``StudyDetail.convergence`` field. The\n``best_so_far_curve`` is the chart's data series; ``verdict`` is the\nbadge label.\n\n**Name discipline (plan §0).** The bare class name ``ConvergenceShape``\nis already taken by :class:`backend.app.domain.study.confidence.ConvergenceShape`\n(a different concept — winner-trial *timing*, not metric plateau).\n``StudyConvergenceShape`` is the study-level analogue; the confidence\nsub-shape stays on its inner module. The two coexist on ``StudyDetail``\n(``confidence.convergence`` is the inner one; ``convergence`` is this\none), and FastAPI emits both under their bare class names in the\nOpenAPI schema — no fully-qualified disambiguation noise leaks to the\nfrontend.","properties":{"best_so_far_curve":{"items":{"$ref":"#/components/schemas/CurvePoint"},"title":"Best So Far Curve","type":"array"},"direction":{"enum":["maximize","minimize"],"title":"Direction","type":"string"},"epsilon":{"title":"Epsilon","type":"number"},"improvement_in_window":{"title":"Improvement In Window","type":"number"},"total_complete_trials":{"title":"Total Complete Trials","type":"integer"},"verdict":{"enum":["converged","still_improving","too_few_trials"],"title":"Verdict","type":"string"},"warmup_floor":{"title":"Warmup Floor","type":"integer"},"window_size":{"title":"Window Size","type":"integer"}},"required":["verdict","direction","window_size","epsilon","warmup_floor","total_complete_trials","improvement_in_window","best_so_far_curve"],"title":"StudyConvergenceShape","type":"object"},"StudyDetail":{"description":"``GET /api/v1/studies/{id}`` response + ``POST/cancel`` response.","properties":{"baseline_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Baseline Metric"},"baseline_trial_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Baseline Trial Id"},"best_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Best Metric"},"best_trial_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Best Trial Id"},"cluster_id":{"title":"Cluster Id","type":"string"},"completed_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Completed At"},"confidence":{"anyOf":[{"$ref":"#/components/schemas/ConfidenceShape"},{"type":"null"}]},"config":{"additionalProperties":true,"title":"Config","type":"object"},"convergence":{"anyOf":[{"$ref":"#/components/schemas/StudyConvergenceShape"},{"type":"null"}]},"created_at":{"format":"date-time","title":"Created At","type":"string"},"failed_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Failed Reason"},"id":{"title":"Id","type":"string"},"judgment_list_id":{"title":"Judgment List Id","type":"string"},"name":{"title":"Name","type":"string"},"objective":{"additionalProperties":true,"title":"Objective","type":"object"},"optuna_study_name":{"title":"Optuna Study Name","type":"string"},"parent_study_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Parent Study Id"},"query_set_id":{"title":"Query Set Id","type":"string"},"search_space":{"additionalProperties":true,"title":"Search Space","type":"object"},"started_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Started At"},"status":{"enum":["queued","running","completed","cancelled","failed"],"title":"Status","type":"string"},"target":{"title":"Target","type":"string"},"template_id":{"title":"Template Id","type":"string"},"trials_summary":{"$ref":"#/components/schemas/TrialsSummaryShape"}},"required":["id","name","cluster_id","target","template_id","query_set_id","judgment_list_id","search_space","objective","config","status","failed_reason","optuna_study_name","parent_study_id","baseline_metric","baseline_trial_id","best_metric","best_trial_id","created_at","started_at","completed_at","trials_summary"],"title":"StudyDetail","type":"object"},"StudyListResponse":{"description":"``GET /api/v1/studies`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/StudySummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"StudyListResponse","type":"object"},"StudySummary":{"description":"List-view shape.","properties":{"best_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Best Metric"},"cluster_id":{"title":"Cluster Id","type":"string"},"completed_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Completed At"},"convergence_verdict":{"anyOf":[{"enum":["converged","still_improving","too_few_trials"],"type":"string"},{"type":"null"}],"title":"Convergence Verdict"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"direction":{"default":"maximize","enum":["maximize","minimize"],"title":"Direction","type":"string"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"status":{"enum":["queued","running","completed","cancelled","failed"],"title":"Status","type":"string"},"trial_count":{"default":0,"title":"Trial Count","type":"integer"}},"required":["id","name","cluster_id","status","best_metric","created_at","completed_at"],"title":"StudySummary","type":"object"},"Subsystems":{"description":"Per-subsystem reachability/configuration state. Wire values per spec §7.4.","properties":{"db":{"description":"Postgres reachability","enum":["ok","down"],"title":"Db","type":"string"},"elasticsearch":{"description":"Local Elasticsearch container reachability","enum":["reachable","unreachable"],"title":"Elasticsearch","type":"string"},"elasticsearch_clusters":{"$ref":"#/components/schemas/ClusterAggregateHealth","description":"Aggregate health of user-registered clusters (infra_adapter_elastic Story 3.5 / spec §2). registered=0 → all-zero counts; informational only — does NOT trigger overall `degraded`."},"openai":{"description":"OpenAI key + capability state. 'incapable' added per FR-2 vs. spec §7.4 enum table — see implementation_plan.md §13 Review log.","enum":["configured","missing_key","incapable"],"title":"Openai","type":"string"},"opensearch":{"description":"Local OpenSearch container reachability","enum":["reachable","unreachable"],"title":"Opensearch","type":"string"},"redis":{"description":"Redis reachability","enum":["ok","down"],"title":"Redis","type":"string"},"solr":{"default":"not_configured","description":"Local Apache Solr container reachability. 'not_configured' when SOLR_HOST is unset (operator opted out of running the Solr service). Added by infra_adapter_solr Story A10 / spec FR-12a.","enum":["reachable","unreachable","not_configured"],"title":"Solr","type":"string"}},"required":["db","redis","openai","elasticsearch","opensearch","elasticsearch_clusters"],"title":"Subsystems","type":"object"},"SwapTemplateFollowup":{"additionalProperties":false,"description":"A 'swap_template' followup — re-run against a different query template.\n\nCarries the LLM-proposed bounds for params shared with the parent template\nin ``search_space``. The digest worker calls\n:func:`backend.app.domain.study.template_swap.remap_search_space_for_swap_target`\nafter parsing to merge these bounds with heuristic defaults for any\nswap-target params not shared with the parent.\n\nOwner: ``feat_digest_executable_followups_swap_template`` (Tier B).","properties":{"kind":{"const":"swap_template","title":"Kind","type":"string"},"rationale":{"title":"Rationale","type":"string"},"search_space":{"$ref":"#/components/schemas/SearchSpace"},"template_id":{"maxLength":36,"minLength":36,"title":"Template Id","type":"string"}},"required":["kind","rationale","template_id","search_space"],"title":"SwapTemplateFollowup","type":"object"},"TargetInfo":{"description":"One target (index / collection) on a cluster.","properties":{"doc_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Doc Count"},"name":{"title":"Name","type":"string"}},"required":["name"],"title":"TargetInfo","type":"object"},"TargetListResponse":{"description":"Response for ``GET /api/v1/clusters/{cluster_id}/targets`` (FR-1).\n\nUnpaginated by design — see feature_spec.md §7.1 \"pagination shape\nrationale\". The single-resource lookup pattern matches\n``/clusters/{id}/schema`` rather than the queryable ``/clusters`` list.\n``EntitySelectListPage``'s ``next_cursor`` and ``has_more`` fields\nare optional, so this bare ``data``-only shape consumes correctly on\nthe frontend without pretending to be a cursor endpoint.","properties":{"data":{"items":{"$ref":"#/components/schemas/TargetInfo"},"title":"Data","type":"array"}},"required":["data"],"title":"TargetListResponse","type":"object"},"TextFollowup":{"additionalProperties":false,"description":"A free-form textual suggestion — no auto-prefill, operator interprets.","properties":{"kind":{"const":"text","title":"Kind","type":"string"},"rationale":{"title":"Rationale","type":"string"},"search_space":{"title":"Search Space","type":"null"}},"required":["kind","rationale"],"title":"TextFollowup","type":"object"},"TrialDetail":{"description":"``GET /api/v1/studies/{id}/trials`` response row.","properties":{"duration_ms":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Duration Ms"},"ended_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Ended At"},"error":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error"},"id":{"title":"Id","type":"string"},"is_baseline":{"default":false,"title":"Is Baseline","type":"boolean"},"metrics":{"additionalProperties":true,"title":"Metrics","type":"object"},"optuna_trial_number":{"title":"Optuna Trial Number","type":"integer"},"params":{"additionalProperties":true,"title":"Params","type":"object"},"primary_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Primary Metric"},"started_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Started At"},"status":{"enum":["complete","failed","pruned"],"title":"Status","type":"string"},"study_id":{"title":"Study Id","type":"string"}},"required":["id","study_id","optuna_trial_number","params","primary_metric","metrics","duration_ms","status","error","started_at","ended_at"],"title":"TrialDetail","type":"object"},"TrialListResponse":{"description":"``GET /api/v1/studies/{id}/trials`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/TrialDetail"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"TrialListResponse","type":"object"},"TrialsSummaryShape":{"description":"The ``trials_summary`` field embedded in :class:`StudyDetail`.","properties":{"best_primary_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Best Primary Metric"},"complete":{"title":"Complete","type":"integer"},"failed":{"title":"Failed","type":"integer"},"pruned":{"title":"Pruned","type":"integer"},"total":{"title":"Total","type":"integer"}},"required":["total","complete","failed","pruned","best_primary_metric"],"title":"TrialsSummaryShape","type":"object"},"UbiReadinessResponse":{"description":"``GET /api/v1/clusters/{cluster_id}/ubi-readiness`` response (FR-7).\n\n``covered_pairs_pct`` and ``head_covered`` are nullable — MVP2's\nrung classifier uses event-count thresholds (the SearchAdapter\nProtocol doesn't expose an exact ``_count`` endpoint). The fields\nare reserved on the wire so a future ``infra_adapter_count_method``\ncan fill them without breaking the contract. See\n:mod:`backend.app.services.ubi_readiness` for the rationale.","properties":{"checked_at":{"format":"date-time","title":"Checked At","type":"string"},"covered_pairs_pct":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Covered Pairs Pct"},"head_covered":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Head Covered"},"rung":{"enum":["rung_0","rung_1","rung_2","rung_3"],"title":"Rung","type":"string"}},"required":["rung","covered_pairs_pct","head_covered","checked_at"],"title":"UbiReadinessResponse","type":"object"},"UpdateQueryRequest":{"additionalProperties":false,"description":"``PATCH /api/v1/query-sets/{set_id}/queries/{query_id}`` body.\n\nWhole-object replace on ``query_metadata`` (NOT deep-merge); explicit\n``null`` removes a nullable field; omitted key = no change. Empty\nbody ``{}`` validates as a no-op (AC-28).\n\n``query_text`` is NOT NULL on the underlying table, so explicit-null\nis rejected by the ``@model_validator`` below (a 422 surfaces sooner\nthan the SQL ``NotNullViolation``).","properties":{"query_metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Query Metadata"},"query_text":{"anyOf":[{"maxLength":4000,"minLength":1,"type":"string"},{"type":"null"}],"title":"Query Text"},"reference_answer":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Reference Answer"}},"title":"UpdateQueryRequest","type":"object"},"ValidationError":{"properties":{"ctx":{"title":"Context","type":"object"},"input":{"title":"Input"},"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"title":"Location","type":"array"},"msg":{"title":"Message","type":"string"},"type":{"title":"Error Type","type":"string"}},"required":["loc","msg","type"],"title":"ValidationError","type":"object"},"WidenFollowup":{"additionalProperties":false,"description":"A 'widen' followup — re-run with a broader range than the parent.","properties":{"kind":{"const":"widen","title":"Kind","type":"string"},"rationale":{"title":"Rationale","type":"string"},"search_space":{"$ref":"#/components/schemas/SearchSpace"}},"required":["kind","rationale","search_space"],"title":"WidenFollowup","type":"object"},"_ClusterEmbed":{"description":"Inline cluster summary on proposal responses.","properties":{"engine_type":{"title":"Engine Type","type":"string"},"environment":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Environment"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"}},"required":["id","name","engine_type"],"title":"_ClusterEmbed","type":"object"},"_DigestEmbed":{"description":"Inline digest summary on the proposal-detail response.\n\nfeat_digest_executable_followups Story 4.1 — ``suggested_followups`` is\nnow a discriminated-union list (see ``DigestResponse``).","properties":{"generated_at":{"format":"date-time","title":"Generated At","type":"string"},"id":{"title":"Id","type":"string"},"narrative":{"title":"Narrative","type":"string"},"parameter_importance":{"additionalProperties":{"type":"number"},"title":"Parameter Importance","type":"object"},"recommended_config":{"additionalProperties":true,"title":"Recommended Config","type":"object"},"suggested_followups":{"items":{"$ref":"#/components/schemas/FollowupItem"},"title":"Suggested Followups","type":"array"}},"required":["id","narrative","parameter_importance","recommended_config","suggested_followups","generated_at"],"title":"_DigestEmbed","type":"object"},"_SourceBreakdown":{"description":"Source-breakdown sub-shape on :class:`JudgmentListDetail`.\n\nEvolved 2026-05-29 by ``feat_ubi_judgments`` FR-10 — now three terms\n(``llm + human + click == judgment_count``). The cycle-2 F6\n\"click folds into human\" contract is superseded the moment UBI ships\nclick rows; the UI's source-breakdown card now renders all three\nbuckets separately so operators see the mix at a glance.","properties":{"click":{"title":"Click","type":"integer"},"human":{"title":"Human","type":"integer"},"llm":{"title":"Llm","type":"integer"}},"required":["llm","human","click"],"title":"_SourceBreakdown","type":"object"},"_StudySummary":{"description":"Inline study summary on the proposal-detail response.","properties":{"best_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Best Metric"},"best_trial_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Best Trial Id"},"id":{"title":"Id","type":"string"},"judgment_list":{"additionalProperties":true,"title":"Judgment List","type":"object"},"name":{"title":"Name","type":"string"},"query_set":{"additionalProperties":true,"title":"Query Set","type":"object"},"status":{"title":"Status","type":"string"}},"required":["id","name","status","best_metric","best_trial_id","query_set","judgment_list"],"title":"_StudySummary","type":"object"},"_TemplateEmbed":{"description":"Inline template summary on proposal responses.","properties":{"engine_type":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Engine Type"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"version":{"title":"Version","type":"integer"}},"required":["id","name","version"],"title":"_TemplateEmbed","type":"object"}}},"info":{"description":"Open-source automated relevance tuning for enterprise search platforms","title":"RelyLoop","version":"0.1.0"},"openapi":"3.1.0","paths":{"/api/v1/_test/auto-followup/seed-chain":{"post":{"description":"Test-only endpoint. Returns 404 unless `ENVIRONMENT=development`. Inserts a chain of `depth + 1` studies where each child carries the prior node's id as `parent_study_id`. The public POST /studies endpoint does NOT accept `parent_study_id` (it's set only by the auto-followup worker via `repo.create_study(parent_study_id=...)`), so this endpoint is the only way to drive deterministic E2E coverage of chain-panel parent-link / children-table / cascade-radio paths. Closes chore_auto_followup_e2e_chain_seed_helper.","operationId":"seed_auto_followup_chain_endpoint_api_v1__test_auto_followup_seed_chain_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SeedAutoFollowupChainRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SeedAutoFollowupChainResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Seed an auto-followup chain of N+1 linked studies","tags":["test-only"]}},"/api/v1/_test/demo/reseed":{"post":{"description":"Enqueues an Arq job that wipes the demo Postgres tables + ES/OS indices, then re-seeds the 4 demo scenarios from ``scripts/seed_meaningful_demos.py`` using REAL studies (real Optuna trials, real metrics per scenario). Returns 202 + an initial ``ReseedStatusResponse`` immediately; the frontend polls ``GET /api/v1/_test/demo/reseed/status`` for progress.\n\nPer ``bug_demo_reseed_fake_metric_regression``. Replaces the previous synchronous path that called ``/_test/studies/seed-completed`` and produced identical ``best_metric=0.487`` rows for every scenario.","operationId":"reseed_demo_api_v1__test_demo_reseed_post","responses":{"202":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReseedStatusResponse"}}},"description":"Successful Response"}},"summary":"Enqueue a demo-state reseed (dev-only, async)","tags":["test-only"]}},"/api/v1/_test/demo/reseed/status":{"get":{"description":"Returns the current reseed status from Redis. When no reseed has ever run (or the result TTL'd out), returns ``{status: 'idle'}`` rather than 404 so the frontend's polling loop is trivially safe.","operationId":"reseed_demo_status_api_v1__test_demo_reseed_status_get","responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReseedStatusResponse"}}},"description":"Successful Response"}},"summary":"Poll the current demo-reseed progress (dev-only)","tags":["test-only"]}},"/api/v1/_test/digests/{digest_id}":{"delete":{"description":"FR-2: Hard-delete the digest row. No FK children — no preflight needed.","operationId":"delete_test_digest_api_v1__test_digests__digest_id__delete","parameters":[{"in":"path","name":"digest_id","required":true,"schema":{"title":"Digest Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Hard-delete a digest (test-only)","tags":["test-only"]}},"/api/v1/_test/judgment-lists/{judgment_list_id}":{"delete":{"description":"FR-4 — hard-delete the judgment_list row.\n\nJudgments cascade-delete via existing FK. Preflight-checks ``studies``\n(non-cascade); 409 if any study references the judgment_list.","operationId":"delete_test_judgment_list_api_v1__test_judgment_lists__judgment_list_id__delete","parameters":[{"in":"path","name":"judgment_list_id","required":true,"schema":{"title":"Judgment List Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Hard-delete a judgment_list (test-only)","tags":["test-only"]}},"/api/v1/_test/proposals/{proposal_id}":{"delete":{"description":"FR-1: Hard-delete the proposal row. No FK children — no preflight needed.","operationId":"delete_test_proposal_api_v1__test_proposals__proposal_id__delete","parameters":[{"in":"path","name":"proposal_id","required":true,"schema":{"title":"Proposal Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Hard-delete a proposal (test-only)","tags":["test-only"]}},"/api/v1/_test/query-sets/{query_set_id}":{"delete":{"description":"FR-5 — hard-delete the query_set row.\n\nQueries cascade-delete via existing FK. Preflight-checks ``studies``\n+ ``judgment_lists`` (both non-cascade); 409 with resource-specific\ncode if either references.","operationId":"delete_test_query_set_api_v1__test_query_sets__query_set_id__delete","parameters":[{"in":"path","name":"query_set_id","required":true,"schema":{"title":"Query Set Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Hard-delete a query_set (test-only)","tags":["test-only"]}},"/api/v1/_test/query-templates/{template_id}":{"delete":{"description":"FR-6 — hard-delete the query_template row.\n\nNo FK children cascade with template. Preflight-checks ``studies``,\n``proposals``, and ``judgment_lists.current_template_id`` in\n**fixed priority order: STUDY > PROPOSAL > JUDGMENT_LIST** (per\nspec §FR-6) — first match wins.","operationId":"delete_test_query_template_api_v1__test_query_templates__template_id__delete","parameters":[{"in":"path","name":"template_id","required":true,"schema":{"title":"Template Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Hard-delete a query_template (test-only)","tags":["test-only"]}},"/api/v1/_test/studies/seed-completed":{"post":{"description":"Test-only endpoint. Returns 404 unless `ENVIRONMENT=development`. Inserts a study (driven through queued → running → completed via the legal state-machine transitions), 2 trials (one winner, one comparison), a digest, and optionally a pending proposal in a single transaction. Used by the Playwright E2E suite to cover the digest-panel surfaces (7 tooltip placements + AC-7 body content + AC-11 Open PR enabled/disabled branches) without waiting on the orchestrator + Optuna workers.","operationId":"seed_completed_study_api_v1__test_studies_seed_completed_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SeedCompletedStudyRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SeedCompletedStudyResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Seed a completed study + digest + (optional) pending proposal","tags":["test-only"]}},"/api/v1/_test/studies/{study_id}":{"delete":{"description":"FR-3 — hard-delete the study row.\n\nTrials cascade-delete via existing FK. Preflight-checks ``proposals``\n+ ``digests`` (both non-cascade); 409 if any dependent rows reference\nthe study.","operationId":"delete_test_study_api_v1__test_studies__study_id__delete","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Hard-delete a study (test-only)","tags":["test-only"]}},"/api/v1/clusters":{"get":{"description":"List clusters with cursor pagination + ``X-Total-Count`` header.\n\n``?q=`` is a Postgres FTS match against the cluster's ``search_vector``\n(name + base_url); 2–200 chars. Filter-only — ordering unchanged per\nspec FR-1. ``?sort=`` is one of the values in\n:data:`~backend.app.api.v1.schemas.ClusterSortKey`; the cursor is\nsort-aware so the keyset predicate matches the active ORDER BY\n(feat_data_table_primitive Stories 1.2 + 1.3).","operationId":"list_clusters_api_v1_clusters_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"q","required":false,"schema":{"anyOf":[{"maxLength":200,"minLength":2,"type":"string"},{"type":"null"}],"title":"Q"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["name:asc","name:desc","created_at:asc","created_at:desc","environment:asc","environment:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}},{"in":"query","name":"engine_type","required":false,"schema":{"anyOf":[{"enum":["elasticsearch","opensearch","solr"],"type":"string"},{"type":"null"}],"title":"Engine Type"}},{"in":"query","name":"environment","required":false,"schema":{"anyOf":[{"enum":["prod","staging","dev"],"type":"string"},{"type":"null"}],"title":"Environment"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClusterListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Clusters","tags":["clusters"]},"post":{"description":"Register a cluster (FR-5 / AC-1).","operationId":"create_cluster_api_v1_clusters_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateClusterRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClusterDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Cluster","tags":["clusters"]}},"/api/v1/clusters/test-connection":{"post":{"description":"Probe a cluster config WITHOUT persisting (infra_adapter_solr Story A9).\n\nPowers the registration modal's \"Test connection\" button. Always 200 —\ntransport failures surface as ``reachable=false`` with ``error`` set.\nInvalid engine×auth pairings 400 BEFORE the network call.","operationId":"test_connection_api_v1_clusters_test_connection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConnectionTestRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConnectionTestResult"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Test Connection","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}":{"delete":{"description":"Soft-delete a cluster (AC-8). Returns 204 with no body.","operationId":"delete_cluster_api_v1_clusters__cluster_id__delete","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Delete Cluster","tags":["clusters"]},"get":{"description":"Return cluster row + cached/fresh health probe.","operationId":"get_cluster_detail_api_v1_clusters__cluster_id__get","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClusterDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Cluster Detail","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/reprobe":{"post":{"description":"Re-run cluster capability probe (Story A9 / spec FR-2 + AC-14).\n\nConcurrent calls serialize on ``SELECT … FOR UPDATE``. On probe failure\nthe row's engine_config is NOT updated (the transaction rolls back).","operationId":"reprobe_cluster_api_v1_clusters__cluster_id__reprobe_post","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}}],"responses":{"202":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClusterDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Reprobe Cluster","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/run_query":{"post":{"description":"Execute one query DSL fragment against the cluster (FR-6 / AC-3).","operationId":"run_query_api_v1_clusters__cluster_id__run_query_post","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}},{"in":"query","name":"timeout_s","required":false,"schema":{"default":5.0,"maximum":30.0,"minimum":1.0,"title":"Timeout S","type":"number"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunQueryRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunQueryResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Run Query","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/schema":{"get":{"description":"Return the field schema for ``target`` (FR-4 / AC-2).","operationId":"get_cluster_schema_api_v1_clusters__cluster_id__schema_get","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}},{"in":"query","name":"target","required":true,"schema":{"maxLength":256,"minLength":1,"title":"Target","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Schema"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Cluster Schema","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/targets":{"get":{"description":"List targets (indices/collections) on the cluster (FR-1 / AC-1).\n\nThin passthrough to ``ElasticAdapter.list_targets()`` (which filters out\nsystem indices whose names start with ``.``). Mirrors the ``get_cluster_schema``\npattern: ``get_cluster`` → ``acquire_adapter`` async context → adapter call\n→ translate exceptions via the ``_err()`` helper to the spec §7.5 envelope.\n\nError mapping:\n* cluster missing or soft-deleted → 404 ``CLUSTER_NOT_FOUND`` (retryable=false)\n* adapter raises ``TargetsForbiddenError`` (ACL 401/403) → 403\n ``TARGETS_FORBIDDEN`` (retryable=false) — frontend auto-engages manual mode\n* adapter raises ``ClusterUnreachableError`` (5xx / connection failure) → 503\n ``CLUSTER_UNREACHABLE`` (retryable=true)","operationId":"list_cluster_targets_api_v1_clusters__cluster_id__targets_get","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/TargetListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Cluster Targets","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/targets/{target}/documents":{"get":{"description":"Paginated _id + truncated _source preview for a target (FR-3).\n\nThe endpoint asks the adapter for ``limit + 1`` rows so it can detect\nend-of-data exactly (no extra round-trip). Only the first ``limit`` rows\nare returned; ``next_cursor`` encodes the ES ``hits[i].sort`` of the\nlast visible row when ``has_more`` is True. ``X-Total-Count`` header\ncarries the engine's ``hits.total.value``.","operationId":"list_target_documents_api_v1_clusters__cluster_id__targets__target__documents_get","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}},{"in":"path","name":"target","required":true,"schema":{"title":"Target","type":"string"}},{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"maxLength":4096,"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":25,"maximum":100,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"fields","required":false,"schema":{"anyOf":[{"maxLength":2048,"type":"string"},{"type":"null"}],"title":"Fields"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/DocumentListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Target Documents","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/targets/{target}/documents/{doc_id}":{"get":{"description":"Fetch one document by ``_id`` (FR-4).\n\nFastAPI's ``{doc_id:path}`` converter round-trips slashes verbatim, so\noperator IDs containing ``/`` are supported (D-17 / AC-16). Returns the\nadapter ``Document`` shape directly; on ``found: false`` returns 404\n``DOCUMENT_NOT_FOUND`` (distinct from ``TARGET_NOT_FOUND``).","operationId":"get_target_document_api_v1_clusters__cluster_id__targets__target__documents__doc_id__get","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}},{"in":"path","name":"target","required":true,"schema":{"title":"Target","type":"string"}},{"in":"path","name":"doc_id","required":true,"schema":{"title":"Doc Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Document"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Target Document","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/ubi-readiness":{"get":{"description":"Classify ``(cluster, query_set, target)`` on the UBI rung ladder.\n\nfeat_ubi_judgments FR-7.\n\nRequired query params: ``query_set_id`` + ``target`` (Spec FR-7 +\ncycle-3 D-10c: the endpoint MUST 422 without them — the classifier\ncan't compute a per-target rung without an application filter).\n\nError envelopes (all per spec §7.5):\n* ``404 CLUSTER_NOT_FOUND`` — cluster row missing or soft-deleted.\n* ``404 QUERY_SET_NOT_FOUND`` — query set row missing.\n* ``422 VALIDATION_ERROR`` — missing required query params (FastAPI's\n built-in handler, surfaces via ``api/errors.py``).\n* ``503 CLUSTER_UNREACHABLE`` — adapter cannot reach the cluster.\n\nThe result is cached for 60 s in Redis per\n``(cluster_id, query_set_id, target)`` so back-to-back dialog-open\nand dialog-submit calls don't re-probe.","operationId":"get_cluster_ubi_readiness_api_v1_clusters__cluster_id__ubi_readiness_get","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}},{"in":"query","name":"query_set_id","required":true,"schema":{"maxLength":36,"minLength":1,"title":"Query Set Id","type":"string"}},{"in":"query","name":"target","required":true,"schema":{"maxLength":256,"minLength":1,"title":"Target","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UbiReadinessResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Cluster Ubi Readiness","tags":["clusters"]}},"/api/v1/config-repos":{"get":{"description":"Cursor-paginated config-repo list, newest first.","operationId":"list_config_repos_endpoint_api_v1_config_repos_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConfigReposListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Config Repos Endpoint","tags":["config-repos"]},"post":{"description":"Register a new config repo. ``provider`` is server-derived from ``repo_url``.\n\nPreflight order matches spec FR-3:\n\n1. ``validate_repo_url(repo_url)`` → 400 ``UNSUPPORTED_PROVIDER`` for\n non-GitHub URLs (AC-8). GitLab + Bitbucket arrive at MVP3.\n2. ``./secrets/{auth_ref}`` must exist → else 400 ``AUTH_REF_NOT_FOUND``\n (AC-9). The contents check defers to the worker — operators may\n populate the file between registration and first PR-open.\n3. ``name`` uniqueness check → 409 ``CONFIG_REPO_NAME_TAKEN`` on collision.\n4. Insert with server-derived ``provider=\"github\"``.\n5. **feat_github_webhook Story 4.2** — when ``webhook_secret_ref`` is\n populated, best-effort enqueue ``register_webhook`` against the\n newly created config_repo id. Enqueue failure (Redis down, pool\n absent, transient blip) does NOT break the 201 — it logs WARN\n and the operator drives recovery via the runbook.","operationId":"create_config_repo_endpoint_api_v1_config_repos_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateConfigRepoRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConfigRepoDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Config Repo Endpoint","tags":["config-repos"]}},"/api/v1/config-repos/{config_repo_id}":{"get":{"description":"Detail by id; 404 ``CONFIG_REPO_NOT_FOUND`` if missing.\n\nfeat_config_repo_baseline_tracking FR-4 — when\n``last_merged_proposal_id`` is set, embed the pointed-at proposal as a\n:class:`ProposalSummary` with ``is_currently_live=True``. The embed-side\nderivation uses the pointer context directly (NOT the generic\n``proposals → clusters → config_repos`` JOIN used elsewhere) so the\nbadge renders correctly even when the proposal's cluster was later\nunwired from this config_repo (spec §19 \"Cluster-with-config_repo-\nrotated\" decision-log entry).","operationId":"get_config_repo_endpoint_api_v1_config_repos__config_repo_id__get","parameters":[{"in":"path","name":"config_repo_id","required":true,"schema":{"title":"Config Repo Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConfigRepoDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Config Repo Endpoint","tags":["config-repos"]}},"/api/v1/conversations":{"get":{"description":"List conversations newest-first with per-row message_count + X-Total-Count header.\n\n``?since=`` (Story 1.5 — closes api-conventions.md drift) filters by\n``created_at >= since``. ``?q=`` (Story 1.2) is a Postgres FTS match\nagainst ``search_vector`` (coalesce(title, '')); 2-200 chars.","operationId":"list_conversations_endpoint_api_v1_conversations_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"q","required":false,"schema":{"anyOf":[{"maxLength":200,"minLength":2,"type":"string"},{"type":"null"}],"title":"Q"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConversationsListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Conversations Endpoint","tags":["conversations"]},"post":{"description":"Create a new conversation. Title is optional (FR-1 auto-generates from first message).","operationId":"create_conversation_endpoint_api_v1_conversations_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateConversationRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConversationSummary"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Conversation Endpoint","tags":["conversations"]}},"/api/v1/conversations/{conversation_id}":{"delete":{"description":"Soft-delete the conversation; subsequent reads return 404.","operationId":"delete_conversation_endpoint_api_v1_conversations__conversation_id__delete","parameters":[{"in":"path","name":"conversation_id","required":true,"schema":{"title":"Conversation Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Delete Conversation Endpoint","tags":["conversations"]},"get":{"description":"Return the conversation's full message history.","operationId":"get_conversation_endpoint_api_v1_conversations__conversation_id__get","parameters":[{"in":"path","name":"conversation_id","required":true,"schema":{"title":"Conversation Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConversationDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Conversation Endpoint","tags":["conversations"]}},"/api/v1/conversations/{conversation_id}/messages":{"post":{"description":"Send a user message and stream the assistant turn as SSE.\n\nPreflight (in order; returns plain JSON envelope, NOT a partial stream):\n A. Conversation exists → else 404 ``CONVERSATION_NOT_FOUND``.\n B. ``Settings.openai_api_key`` populated → else 503 ``OPENAI_NOT_CONFIGURED``.\n C. Daily budget peek under cap → else 503 ``OPENAI_BUDGET_EXCEEDED``.\n\nSuccessful preflight returns a ``StreamingResponse(text/event-stream)``\ndriven by :func:`agent_chat.send_user_message`.","operationId":"post_message_endpoint_api_v1_conversations__conversation_id__messages_post","parameters":[{"in":"path","name":"conversation_id","required":true,"schema":{"title":"Conversation Id","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SendMessageRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Post Message Endpoint","tags":["conversations"]}},"/api/v1/judgment-lists":{"get":{"description":"List judgment lists, newest-first with cursor pagination.\n\n``?since=`` filters by ``created_at >= since`` (Story 1.5). ``?q=`` FTS\nmatch against ``search_vector`` (name + target). ``?sort=`` is a\n:data:`JudgmentListSortKey` value with sort-aware cursor (Story 1.3).\n``?query_set_id`` / ``?cluster_id`` filter to lists belonging to the\nsupplied parent (``bug_judgment_lists_listing_ignores_query_set_filter``\n— required by the create-study modal's Step-2 dropdown so the user\ncan only pick judgment-lists valid for the chosen query-set + cluster;\nwithout these filters the modal returns all rows and the user can\npick a mismatched pair, which the ``POST /api/v1/studies`` cross-\nentity integrity check then rejects at create time with a confusing\n422 ``VALIDATION_ERROR: \"judgment_list query_set_id does not match\nstudy query_set_id\"``).\n\n``?target=`` filters by exact target index/collection name\n(``feat_study_target_judgment_mismatch_guard`` FR-2 — pairs with the\n``POST /studies`` ``JUDGMENT_TARGET_MISMATCH`` 422 so the create-study\nmodal can pre-filter the dropdown to only lists matching the chosen\nstudy target). Bounded by the ES/OpenSearch index-name ceiling\n(255 bytes).","operationId":"list_judgment_lists_endpoint_api_v1_judgment_lists_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"q","required":false,"schema":{"anyOf":[{"maxLength":200,"minLength":2,"type":"string"},{"type":"null"}],"title":"Q"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["name:asc","name:desc","created_at:asc","created_at:desc","status:asc","status:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}},{"in":"query","name":"query_set_id","required":false,"schema":{"anyOf":[{"maxLength":36,"minLength":1,"type":"string"},{"type":"null"}],"title":"Query Set Id"}},{"in":"query","name":"cluster_id","required":false,"schema":{"anyOf":[{"maxLength":36,"minLength":1,"type":"string"},{"type":"null"}],"title":"Cluster Id"}},{"in":"query","name":"target","required":false,"schema":{"anyOf":[{"maxLength":255,"minLength":1,"type":"string"},{"type":"null"}],"title":"Target"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/JudgmentListListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Judgment Lists Endpoint","tags":["judgments"]}},"/api/v1/judgment-lists/import":{"post":{"description":"Create a judgment_lists row with status='complete' + bulk-insert judgments.\n\nTutorial path; no OpenAI involvement. Every supplied judgment must\nreference a ``query_id`` that exists in ``body.query_set_id`` —\nmismatches → 400 ``QUERY_NOT_IN_SET``.","operationId":"import_judgment_list_api_v1_judgment_lists_import_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportJudgmentListRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/JudgmentListDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Import Judgment List","tags":["judgments"]}},"/api/v1/judgment-lists/{judgment_list_id}":{"get":{"operationId":"get_judgment_list_endpoint_api_v1_judgment_lists__judgment_list_id__get","parameters":[{"in":"path","name":"judgment_list_id","required":true,"schema":{"title":"Judgment List Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/JudgmentListDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Judgment List Endpoint","tags":["judgments"]}},"/api/v1/judgment-lists/{judgment_list_id}/calibration":{"post":{"description":"Compute Cohen's + weighted kappa from supplied human samples.\n\nPairs are built by joining each sample with the existing\n``source='llm'`` judgment at ``(query_id, doc_id)`` — overridden rows\n(``source='human'``) are excluded (per spec FR-5 + GPT-5.5 cycle 1 F12).","operationId":"calibrate_judgment_list_api_v1_judgment_lists__judgment_list_id__calibration_post","parameters":[{"in":"path","name":"judgment_list_id","required":true,"schema":{"title":"Judgment List Id","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CalibrationSamplesRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CalibrationResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Calibrate Judgment List","tags":["judgments"]}},"/api/v1/judgment-lists/{judgment_list_id}/judgments":{"get":{"description":"List per-list judgments with cursor pagination.\n\n``?sort=`` is :data:`JudgmentRowSortKey` with sort-aware cursor\n(feat_data_table_primitive Story 1.3).","operationId":"list_judgments_endpoint_api_v1_judgment_lists__judgment_list_id__judgments_get","parameters":[{"in":"path","name":"judgment_list_id","required":true,"schema":{"title":"Judgment List Id","type":"string"}},{"in":"query","name":"source","required":false,"schema":{"anyOf":[{"enum":["llm","human","click"],"type":"string"},{"type":"null"}],"title":"Source"}},{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["created_at:asc","created_at:desc","rating:asc","rating:desc","source:asc","source:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/JudgmentListJudgmentsResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Judgments Endpoint","tags":["judgments"]}},"/api/v1/judgment-lists/{judgment_list_id}/judgments/{judgment_id}":{"patch":{"description":"Replace an LLM rating with a human override (UPSERT-replace).","operationId":"override_judgment_api_v1_judgment_lists__judgment_list_id__judgments__judgment_id__patch","parameters":[{"in":"path","name":"judgment_list_id","required":true,"schema":{"title":"Judgment List Id","type":"string"}},{"in":"path","name":"judgment_id","required":true,"schema":{"title":"Judgment Id","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/OverrideJudgmentRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/JudgmentRow"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Override Judgment","tags":["judgments"]}},"/api/v1/judgments/generate":{"post":{"description":"Create a judgment_lists row + enqueue the worker.\n\nDelegates the full preflight + INSERT + Arq enqueue to\n:func:`backend.app.services.agent_judgments_dispatch.start_judgment_generation`\nso the chat-agent ``generate_judgments_llm`` tool reuses the exact same\nchecks (no duplicated preflight). Wire behavior is identical — same error\ncodes, same status codes, same response shape.","operationId":"generate_judgments_api_v1_judgments_generate_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateJudgmentListGenerateRequest"}}},"required":true},"responses":{"202":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateJudgmentsResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Generate Judgments","tags":["judgments"]}},"/api/v1/judgments/generate-from-ubi":{"post":{"description":"Start a UBI-derived judgment generation job.\n\nDelegates to\n:func:`backend.app.services.agent_judgments_dispatch.start_ubi_judgment_generation`\nwhich runs the full FR-4 preflight (U-A..U-H) before INSERT + Arq\nenqueue. The Pydantic ``model_validator`` on\n:class:`CreateJudgmentListFromUbiRequest` already enforces the\nhybrid conditional (``current_template_id`` + ``rubric`` required\niff ``converter == 'hybrid_ubi_llm'``); the dispatcher trusts the\nvalidated request.","operationId":"generate_judgments_from_ubi_api_v1_judgments_generate_from_ubi_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateJudgmentListFromUbiRequest"}}},"required":true},"responses":{"202":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateJudgmentsResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Generate Judgments From Ubi","tags":["judgments"]}},"/api/v1/proposals":{"get":{"description":"List proposals with cursor pagination + filters.\n\n``?template_id=`` (Story 1.5) filters by ``proposals.template_id`` FK;\n``?study_id=`` filters by ``proposals.study_id`` FK (used by the\nstudy-detail page's pending-proposal lookup). Both reject invalid\nUUIDs with 422 via FastAPI's UUID parsing. ``?sort=`` (Story 1.3) is\na :data:`ProposalSortKey` value with sort-aware cursor.","operationId":"list_proposals_endpoint_api_v1_proposals_get","parameters":[{"in":"query","name":"status","required":false,"schema":{"anyOf":[{"enum":["pending","pr_opened","pr_merged","rejected"],"type":"string"},{"type":"null"}],"title":"Status"}},{"in":"query","name":"cluster_id","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cluster Id"}},{"in":"query","name":"source","required":false,"schema":{"anyOf":[{"enum":["study","manual"],"type":"string"},{"type":"null"}],"title":"Source"}},{"in":"query","name":"template_id","required":false,"schema":{"anyOf":[{"format":"uuid","type":"string"},{"type":"null"}],"title":"Template Id"}},{"in":"query","name":"study_id","required":false,"schema":{"anyOf":[{"format":"uuid","type":"string"},{"type":"null"}],"title":"Study Id"}},{"in":"query","name":"is_last_merged","required":false,"schema":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Is Last Merged"}},{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["created_at:asc","created_at:desc","status:asc","status:desc","pr_state:asc","pr_state:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ProposalsListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Proposals Endpoint","tags":["proposals"]},"post":{"description":"Manually create a proposal (chat-agent hand-crafted tweaks).\n\n``study_id`` and ``study_trial_id`` are NULL for manual proposals.\nValidates FK targets (cluster + template exist) before insert.","operationId":"create_manual_proposal_api_v1_proposals_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateProposalRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ProposalDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Manual Proposal","tags":["proposals"]}},"/api/v1/proposals/{proposal_id}":{"get":{"operationId":"get_proposal_endpoint_api_v1_proposals__proposal_id__get","parameters":[{"in":"path","name":"proposal_id","required":true,"schema":{"title":"Proposal Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ProposalDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Proposal Endpoint","tags":["proposals"]}},"/api/v1/proposals/{proposal_id}/open_pr":{"post":{"description":"Enqueue the ``open_pr`` worker for an operator-approved proposal.\n\nDelegates the full preflight + Arq enqueue to\n:func:`backend.app.services.agent_proposals_dispatch.open_pr` so the\nchat-agent ``open_pr`` tool reuses the same checks. Wire behavior is\nidentical — same error codes, status codes, response shape.","operationId":"open_pr_endpoint_api_v1_proposals__proposal_id__open_pr_post","parameters":[{"in":"path","name":"proposal_id","required":true,"schema":{"title":"Proposal Id","type":"string"}}],"responses":{"202":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenPrResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Open Pr Endpoint","tags":["proposals"]}},"/api/v1/proposals/{proposal_id}/reject":{"post":{"description":"AC-5: ``pending → rejected`` transition; 409 INVALID_STATE_TRANSITION otherwise.","operationId":"reject_proposal_endpoint_api_v1_proposals__proposal_id__reject_post","parameters":[{"in":"path","name":"proposal_id","required":true,"schema":{"title":"Proposal Id","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RejectProposalRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ProposalDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Reject Proposal Endpoint","tags":["proposals"]}},"/api/v1/query-sets":{"get":{"description":"List query sets with cursor pagination + X-Total-Count.\n\n``?q=`` is FTS match against ``search_vector`` (name). ``?sort=`` is a\n:data:`QuerySetSortKey` value; cursor is sort-aware.","operationId":"list_query_sets_api_v1_query_sets_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"q","required":false,"schema":{"anyOf":[{"maxLength":200,"minLength":2,"type":"string"},{"type":"null"}],"title":"Q"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["name:asc","name:desc","created_at:asc","created_at:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QuerySetListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Query Sets","tags":["query-sets"]},"post":{"description":"Register a query set under a cluster (FR-3).","operationId":"create_query_set_api_v1_query_sets_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateQuerySetRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QuerySetDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Query Set","tags":["query-sets"]}},"/api/v1/query-sets/{query_set_id}":{"get":{"description":"Return a query set by id (includes ``query_count``).","operationId":"get_query_set_detail_api_v1_query_sets__query_set_id__get","parameters":[{"in":"path","name":"query_set_id","required":true,"schema":{"title":"Query Set Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QuerySetDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Query Set Detail","tags":["query-sets"]}},"/api/v1/query-sets/{query_set_id}/queries":{"get":{"description":"List per-query rows under a query set, with derived ``judgment_count``.","operationId":"list_queries_in_set_api_v1_query_sets__query_set_id__queries_get","parameters":[{"in":"path","name":"query_set_id","required":true,"schema":{"title":"Query Set Id","type":"string"}},{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QueryListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Queries In Set","tags":["query-sets"]},"post":{"description":"Bulk-add queries to a set (FR-3 + AC-8).\n\nDispatches on Content-Type:\n\n* ``application/json`` → :class:`BulkQueriesJsonRequest` Pydantic-parse.\n* ``text/csv`` → :func:`parse_queries_csv` (AC-8).\n\nOther content types → 415-equivalent surfaced as 400 ``INVALID_CSV``\n(the documented error code for content-type-mismatch in spec §7.5).","operationId":"bulk_add_queries_api_v1_query_sets__query_set_id__queries_post","parameters":[{"in":"path","name":"query_set_id","required":true,"schema":{"title":"Query Set Id","type":"string"}}],"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/BulkQueriesResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Bulk Add Queries","tags":["query-sets"]}},"/api/v1/query-sets/{query_set_id}/queries/{query_id}":{"delete":{"description":"Hard-delete a query. FK-guarded — 409 if any judgment references it.","operationId":"delete_query_endpoint_api_v1_query_sets__query_set_id__queries__query_id__delete","parameters":[{"in":"path","name":"query_set_id","required":true,"schema":{"title":"Query Set Id","type":"string"}},{"in":"path","name":"query_id","required":true,"schema":{"title":"Query Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"409":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QueryHasJudgmentsEnvelope"}}},"description":"Conflict"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Delete Query Endpoint","tags":["query-sets"]},"patch":{"description":"Partial-update a query. Whole-object replace on ``query_metadata``.","operationId":"update_query_endpoint_api_v1_query_sets__query_set_id__queries__query_id__patch","parameters":[{"in":"path","name":"query_set_id","required":true,"schema":{"title":"Query Set Id","type":"string"}},{"in":"path","name":"query_id","required":true,"schema":{"title":"Query Id","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateQueryRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QueryRow"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Update Query Endpoint","tags":["query-sets"]}},"/api/v1/query-templates":{"get":{"description":"List query templates with cursor pagination + X-Total-Count header.\n\n``?q=`` FTS match (name). ``?sort=`` sort-aware cursor (Story 1.3).\n``?engine_type=`` filters by engine (Story 1.4).","operationId":"list_query_templates_api_v1_query_templates_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"q","required":false,"schema":{"anyOf":[{"maxLength":200,"minLength":2,"type":"string"},{"type":"null"}],"title":"Q"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["name:asc","name:desc","created_at:asc","created_at:desc","engine_type:asc","engine_type:desc","version:asc","version:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}},{"in":"query","name":"engine_type","required":false,"schema":{"anyOf":[{"enum":["elasticsearch","opensearch","solr"],"type":"string"},{"type":"null"}],"title":"Engine Type"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QueryTemplateListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Query Templates","tags":["query-templates"]},"post":{"description":"Register a query template (FR-2 + AC-7).\n\nAC-7: a body containing ``{{ os.system('rm -rf /') }}`` surfaces as\n400 ``INVALID_TEMPLATE_SYNTAX`` (the AST walk catches the ``Call``\nnode before reaching the meta-vars cross-check that would otherwise\nclassify ``os`` as ``UndeclaredParamUsed``).","operationId":"create_query_template_api_v1_query_templates_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateQueryTemplateRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QueryTemplateDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Query Template","tags":["query-templates"]}},"/api/v1/query-templates/{template_id}":{"get":{"description":"Return a query template by id.","operationId":"get_query_template_detail_api_v1_query_templates__template_id__get","parameters":[{"in":"path","name":"template_id","required":true,"schema":{"title":"Template Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QueryTemplateDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Query Template Detail","tags":["query-templates"]}},"/api/v1/studies":{"get":{"description":"List studies with cursor pagination + X-Total-Count.\n\n``?status=`` is typed as :data:`StudyStatusWire` so FastAPI returns\n422 ``VALIDATION_ERROR`` for unsupported values. ``?q=`` is a Postgres\nFTS match against ``search_vector`` (name + target). ``?sort=`` is a\n:data:`StudySortKey` value (``:``); the cursor is\nsort-aware (feat_data_table_primitive Stories 1.2 + 1.3).\n\n``?target=`` (feat_index_document_browser FR-5) scopes the list to\nstudies targeting a single index/collection. Composes with all other\nfilters via AND.","operationId":"list_studies_api_v1_studies_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"status","required":false,"schema":{"anyOf":[{"enum":["queued","running","completed","cancelled","failed"],"type":"string"},{"type":"null"}],"title":"Status"}},{"in":"query","name":"cluster_id","required":false,"schema":{"anyOf":[{"maxLength":36,"minLength":1,"type":"string"},{"type":"null"}],"title":"Cluster Id"}},{"in":"query","name":"target","required":false,"schema":{"anyOf":[{"maxLength":256,"minLength":1,"type":"string"},{"type":"null"}],"title":"Target"}},{"in":"query","name":"q","required":false,"schema":{"anyOf":[{"maxLength":200,"minLength":2,"type":"string"},{"type":"null"}],"title":"Q"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["name:asc","name:desc","created_at:asc","created_at:desc","completed_at:asc","completed_at:desc","best_metric:asc","best_metric:desc","status:asc","status:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StudyListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Studies","tags":["studies"]},"post":{"description":"Create a study (FR-1 + AC-1) and enqueue the orchestrator job.","operationId":"create_study_api_v1_studies_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateStudyRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StudyDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Study","tags":["studies"]}},"/api/v1/studies/{study_id}":{"get":{"description":"Return a study by id (includes ``trials_summary``).","operationId":"get_study_detail_api_v1_studies__study_id__get","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StudyDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Study Detail","tags":["studies"]}},"/api/v1/studies/{study_id}/cancel":{"post":{"description":"Cancel a study (Story 2.3, FR-8 + AC-8/AC-9).\n\nOptionally cascades to in-flight chain children.\n\n``?cascade=true`` (default): routes through\n:func:`services.study_state.cancel_study_with_chain_cascade` —\ncancels the parent (if in-flight) AND recursively cancels in-flight\ndescendants. Tolerates terminal parents (recurses through completed\nintermediates to reach an in-flight grandchild).\n\n``?cascade=false``: routes through the original\n:func:`services.study_state.cancel_study` — single-study cancel,\npreserves the existing 409 error contract on terminal parents\n(AC-9 wire contract).","operationId":"cancel_study_api_v1_studies__study_id__cancel_post","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}},{"in":"query","name":"cascade","required":false,"schema":{"default":"true","title":"Cascade","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StudyDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Cancel Study","tags":["studies"]}},"/api/v1/studies/{study_id}/chain":{"get":{"description":"Return the rolled-up chain summary for the study and its lineage (FR-3).\n\nWalks to the chain anchor, aggregates the completed-link subset into a\nbest link + cumulative lift + derived stop reason, and emits per-link\ndeltas. The anchor's ``delta_from_prev`` is always ``None`` (spec §8.3).\nReturns ``404 STUDY_NOT_FOUND`` when the study does not exist.","operationId":"get_study_chain_api_v1_studies__study_id__chain_get","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StudyChainResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Study Chain","tags":["studies"]}},"/api/v1/studies/{study_id}/children":{"get":{"description":"List direct child studies of a parent (FR-10 + D-13).\n\nReturns ``{\"data\": [], \"next_cursor\": null}`` for a study with no\nchildren — empty data array, NOT 404. 404 only fires when the parent\nstudy itself is missing.\n\nPer D-13 (direct-children-only): does NOT return transitive\ndescendants. The chain panel renders parent ↑ + direct children ↓;\noperators walk lineage one hop per page navigation.","operationId":"list_study_children_api_v1_studies__study_id__children_get","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StudyListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Study Children","tags":["studies"]}},"/api/v1/studies/{study_id}/digest":{"get":{"description":"Fetch the digest for a completed study.\n\nReturns 404 ``DIGEST_NOT_READY`` (``retryable=true``) when:\n- the study is not in ``status='completed'``, OR\n- the study is completed but the worker hasn't written the digest yet\n (worker lag, or a worker-side terminal failure like\n ``OPENAI_NOT_CONFIGURED`` deferred the run).","operationId":"get_study_digest_api_v1_studies__study_id__digest_get","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/DigestResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Study Digest","tags":["digests"]}},"/api/v1/studies/{study_id}/trials":{"get":{"description":"List trials in a study (FR-6).\n\nSort variants per spec §7.4: ``primary_metric_desc`` (default),\n``primary_metric_asc``, ``ended_at_desc``, ``ended_at_asc``,\n``optuna_trial_number_asc``.","operationId":"list_study_trials_api_v1_studies__study_id__trials_get","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}},{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"sort","required":false,"schema":{"default":"primary_metric_desc","title":"Sort","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/TrialListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Study Trials","tags":["trials"]}},"/healthz":{"get":{"description":"Probe each subsystem in parallel and return the documented JSON shape.\n\nArgs:\n settings: Application settings (DB URL, ES/OS URLs, OpenAI base URL, etc.)\n redis_client: Redis client for ping probe + capability-cache read\n es_client: shared httpx client for ES + OpenSearch HTTP probes\n db: Async DB session for the registered-clusters aggregate (Story 3.5)\n\nReturns:\n JSONResponse with the HealthResponse body and HTTP 200 (healthy) or 503 (degraded).","operationId":"healthz_healthz_get","responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HealthResponse"}}},"description":"Successful Response"},"503":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HealthResponse"}}},"description":"One or more required subsystems is down"}},"summary":"Healthz","tags":["operator"]}},"/webhooks/github":{"post":{"description":"Receive a single GitHub webhook delivery.\n\nReturns ``{\"status\": \"ok\", \"action\": }`` where\n``wire_action`` is one of the four values in\n:data:`WEBHOOK_ACTION_VALUES`.\n\nRaises:\n HTTPException(403, INVALID_SIGNATURE): bad signature or unknown\n repository. Both share one error code so the receiver does\n not reveal repo enumeration.","operationId":"github_webhook_webhooks_github_post","responses":{"200":{"content":{"application/json":{"schema":{"additionalProperties":{"type":"string"},"title":"Response Github Webhook Webhooks Github Post","type":"object"}}},"description":"Successful Response"}},"summary":"Github Webhook","tags":["webhooks"]}}}} +{"components":{"schemas":{"BulkQueriesResponse":{"description":"``POST /api/v1/query-sets/{id}/queries`` response.","properties":{"added":{"title":"Added","type":"integer"}},"required":["added"],"title":"BulkQueriesResponse","type":"object"},"CIShape":{"description":"Bootstrap percentile CI on the winner's per-query metric values.","properties":{"high":{"title":"High","type":"number"},"low":{"title":"Low","type":"number"},"method":{"const":"bootstrap_n1000","title":"Method","type":"string"},"n_samples":{"title":"N Samples","type":"integer"}},"required":["low","high","method","n_samples"],"title":"CIShape","type":"object"},"CalibrationResponse":{"description":"Calibration endpoint response.\n\nMirrors :class:`backend.app.eval.calibration.CalibrationResult` —\npersisted as ``judgment_lists.calibration`` JSONB.","properties":{"cohens_kappa":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Cohens Kappa"},"n_samples":{"title":"N Samples","type":"integer"},"per_class":{"additionalProperties":{"type":"number"},"title":"Per Class","type":"object"},"warning":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Warning"},"weighted_kappa":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Weighted Kappa"}},"required":["cohens_kappa","weighted_kappa","per_class","n_samples","warning"],"title":"CalibrationResponse","type":"object"},"CalibrationSample":{"description":"One row in :class:`CalibrationSamplesRequest`.","properties":{"doc_id":{"maxLength":512,"minLength":1,"title":"Doc Id","type":"string"},"query_id":{"maxLength":36,"minLength":1,"title":"Query Id","type":"string"},"rating":{"enum":[0,1,2,3],"title":"Rating","type":"integer"}},"required":["query_id","doc_id","rating"],"title":"CalibrationSample","type":"object"},"CalibrationSamplesRequest":{"description":"Body for ``POST /api/v1/judgment-lists/{id}/calibration`` (Story 3.5).","properties":{"human_samples":{"items":{"$ref":"#/components/schemas/CalibrationSample"},"minItems":1,"title":"Human Samples","type":"array"}},"required":["human_samples"],"title":"CalibrationSamplesRequest","type":"object"},"CategoricalParam":{"additionalProperties":false,"description":"Discrete choice parameter.\n\nOptuna ``suggest_categorical`` handles strings, ints, floats, and bools\nas choices.","properties":{"choices":{"items":{"anyOf":[{"type":"string"},{"type":"integer"},{"type":"number"},{"type":"boolean"}]},"minItems":1,"title":"Choices","type":"array"},"type":{"const":"categorical","title":"Type","type":"string"}},"required":["type","choices"],"title":"CategoricalParam","type":"object"},"ClusterAggregateHealth":{"description":"Aggregate counts for the ``elasticsearch_clusters`` /healthz field (Story 3.5).\n\nPer spec §2: probes only the *registered* user clusters (from the DB),\nNOT the local Compose ES/OpenSearch — those have their own subsystem\nfields. ``status`` is a count derived from the cached ``cluster:health:*``\nentries; missing-cache or red/unreachable clusters are counted as\n``unreachable``.","properties":{"healthy":{"title":"Healthy","type":"integer"},"registered":{"title":"Registered","type":"integer"},"unreachable":{"title":"Unreachable","type":"integer"}},"required":["registered","healthy","unreachable"],"title":"ClusterAggregateHealth","type":"object"},"ClusterDetail":{"description":"``GET /api/v1/clusters/{id}`` response.","properties":{"auth_kind":{"enum":["es_apikey","es_basic","opensearch_basic","opensearch_sigv4","solr_basic","solr_apikey"],"title":"Auth Kind","type":"string"},"base_url":{"title":"Base Url","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"engine_config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Engine Config"},"engine_type":{"enum":["elasticsearch","opensearch","solr"],"title":"Engine Type","type":"string"},"environment":{"enum":["prod","staging","dev"],"title":"Environment","type":"string"},"health_check":{"$ref":"#/components/schemas/HealthCheckResult"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"notes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Notes"},"target_filter":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Target Filter"}},"required":["id","name","engine_type","environment","base_url","auth_kind","created_at","health_check"],"title":"ClusterDetail","type":"object"},"ClusterListResponse":{"description":"Paginated list response.","properties":{"data":{"items":{"$ref":"#/components/schemas/ClusterSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"ClusterListResponse","type":"object"},"ClusterSummary":{"description":"List-view; drops engine_config + notes for brevity.","properties":{"auth_kind":{"enum":["es_apikey","es_basic","opensearch_basic","opensearch_sigv4","solr_basic","solr_apikey"],"title":"Auth Kind","type":"string"},"base_url":{"title":"Base Url","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"engine_type":{"enum":["elasticsearch","opensearch","solr"],"title":"Engine Type","type":"string"},"environment":{"enum":["prod","staging","dev"],"title":"Environment","type":"string"},"health_check":{"$ref":"#/components/schemas/HealthCheckResult"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"target_filter":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Target Filter"}},"required":["id","name","engine_type","environment","base_url","auth_kind","created_at","health_check"],"title":"ClusterSummary","type":"object"},"ConfidenceShape":{"description":"The top-level shape exposed via ``StudyDetail.confidence``.\n\nEvery sub-field is independently nullable per FR-7 — degraded paths\nsuppress only the sub-fields they affect, never the whole shape (the\norchestrator returns whole-object ``None`` only when the winner trial\nrow itself is missing).","properties":{"ci_95":{"anyOf":[{"$ref":"#/components/schemas/CIShape"},{"type":"null"}]},"convergence":{"anyOf":[{"$ref":"#/components/schemas/ConvergenceShape"},{"type":"null"}]},"headline":{"$ref":"#/components/schemas/HeadlineShape"},"late_trial_stddev":{"anyOf":[{"$ref":"#/components/schemas/LateTrialStddevShape"},{"type":"null"}]},"per_query_outcomes":{"anyOf":[{"$ref":"#/components/schemas/PerQueryOutcomesShape"},{"type":"null"}]},"runner_up_gap":{"anyOf":[{"$ref":"#/components/schemas/RunnerUpGapShape"},{"type":"null"}]}},"required":["headline","ci_95","runner_up_gap","late_trial_stddev","convergence","per_query_outcomes"],"title":"ConfidenceShape","type":"object"},"ConfigRepoDetail":{"description":"``GET /api/v1/config-repos/{id}`` response + ``POST`` 201 body.","properties":{"auth_ref":{"title":"Auth Ref","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"default_branch":{"title":"Default Branch","type":"string"},"id":{"title":"Id","type":"string"},"last_merged_proposal":{"anyOf":[{"$ref":"#/components/schemas/ProposalSummary"},{"type":"null"}]},"name":{"title":"Name","type":"string"},"pr_base_branch":{"title":"Pr Base Branch","type":"string"},"provider":{"const":"github","title":"Provider","type":"string"},"repo_url":{"title":"Repo Url","type":"string"},"webhook_registration_error":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Webhook Registration Error"},"webhook_secret_ref":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Webhook Secret Ref"}},"required":["id","name","provider","repo_url","default_branch","pr_base_branch","auth_ref","webhook_secret_ref","webhook_registration_error","created_at"],"title":"ConfigRepoDetail","type":"object"},"ConfigReposListResponse":{"description":"``GET /api/v1/config-repos`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/ConfigRepoDetail"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"ConfigReposListResponse","type":"object"},"ConnectionTestRequest":{"description":"Body for ``POST /api/v1/clusters/test-connection`` (infra_adapter_solr Story A9).\n\nSame shape as ``CreateClusterRequest`` minus the persisted-only fields\n(``name``, ``environment``, ``notes``, ``target_filter``). ``engine_type``\n+ ``auth_kind`` are typed as ``str`` (not Literal) so a bad value yields\nthe project-standard 400 envelope rather than a raw 422 — same convention\nas ``CreateClusterRequest``.","properties":{"auth_kind":{"maxLength":64,"minLength":1,"title":"Auth Kind","type":"string"},"base_url":{"maxLength":512,"minLength":1,"title":"Base Url","type":"string"},"credentials_ref":{"maxLength":128,"minLength":1,"title":"Credentials Ref","type":"string"},"engine_config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Engine Config"},"engine_type":{"maxLength":64,"minLength":1,"title":"Engine Type","type":"string"}},"required":["engine_type","base_url","auth_kind","credentials_ref"],"title":"ConnectionTestRequest","type":"object"},"ConnectionTestResult":{"description":"Response for ``POST /api/v1/clusters/test-connection``.\n\nAlways 200 — reachable vs unreachable surfaces via ``reachable`` +\n``status`` fields. The endpoint is a diagnostic, never a mutation,\nso it never returns 503; invalid engine×auth pairings 400 BEFORE the\nnetwork call. (Cycle-delta F1.)","properties":{"engine_capabilities":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Engine Capabilities"},"error":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error"},"reachable":{"title":"Reachable","type":"boolean"},"status":{"enum":["green","yellow","red","unreachable"],"title":"Status","type":"string"},"version":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Version"}},"required":["reachable","status"],"title":"ConnectionTestResult","type":"object"},"ConvergenceShape":{"description":"Where the winner sits in the Optuna trial sequence + the classified regime.","properties":{"best_at_trial":{"title":"Best At Trial","type":"integer"},"regime":{"enum":["early_held","late_rising","noisy"],"title":"Regime","type":"string"},"total_trials":{"title":"Total Trials","type":"integer"}},"required":["best_at_trial","total_trials","regime"],"title":"ConvergenceShape","type":"object"},"ConversationDetail":{"description":"``GET /api/v1/conversations/{id}`` response.","properties":{"created_at":{"format":"date-time","title":"Created At","type":"string"},"id":{"title":"Id","type":"string"},"messages":{"items":{"$ref":"#/components/schemas/MessageWire"},"title":"Messages","type":"array"},"title":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Title"}},"required":["id","title","created_at","messages"],"title":"ConversationDetail","type":"object"},"ConversationSummary":{"description":"``GET /api/v1/conversations`` row + ``POST`` 201 body.\n\n``last_message_preview`` is the most recent user / assistant message's\n``content.text``, truncated at the repo layer to 120 chars (with ``…``\nsuffix when cut). Tool-role rows and assistant rows whose ``content.kind``\nis ``system_notice`` are skipped. ``None`` for brand-new conversations\nwith no qualifying messages — see ``chore_chat_last_message_preview``.\n\n``last_message_at`` is the ``created_at`` of that same row, or ``None``\nfor empty conversations. The list page uses it to render \"when did\nanyone last touch this thread\" instead of the conversation's\n``created_at``.","properties":{"created_at":{"format":"date-time","title":"Created At","type":"string"},"id":{"title":"Id","type":"string"},"last_message_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Last Message At"},"last_message_preview":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Last Message Preview"},"message_count":{"title":"Message Count","type":"integer"},"title":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Title"}},"required":["id","title","created_at","message_count"],"title":"ConversationSummary","type":"object"},"ConversationsListResponse":{"description":"``GET /api/v1/conversations`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/ConversationSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"ConversationsListResponse","type":"object"},"CreateClusterRequest":{"description":"Request body for ``POST /api/v1/clusters``.\n\nSee module docstring for the deliberate ``str`` vs ``Literal`` split.","properties":{"auth_kind":{"maxLength":64,"minLength":1,"title":"Auth Kind","type":"string"},"base_url":{"maxLength":512,"minLength":1,"title":"Base Url","type":"string"},"credentials_ref":{"maxLength":128,"minLength":1,"title":"Credentials Ref","type":"string"},"engine_config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Engine Config"},"engine_type":{"maxLength":64,"minLength":1,"title":"Engine Type","type":"string"},"environment":{"enum":["prod","staging","dev"],"title":"Environment","type":"string"},"name":{"maxLength":128,"minLength":1,"pattern":"^[a-z0-9][a-z0-9-]*$","title":"Name","type":"string"},"notes":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Notes"},"target_filter":{"anyOf":[{"maxLength":256,"minLength":1,"type":"string"},{"type":"null"}],"description":"Optional glob pattern (fnmatch.fnmatchcase: *, ?, [seq], [!seq]; no brace expansion). Scopes GET /clusters/{id}/targets to matching index names. Null = no filter.","title":"Target Filter"}},"required":["name","engine_type","environment","base_url","auth_kind","credentials_ref"],"title":"CreateClusterRequest","type":"object"},"CreateConfigRepoRequest":{"description":"Body of ``POST /api/v1/config-repos`` (FR-3).\n\n``provider`` is server-derived from ``repo_url`` (cycle-2 F4 from\nspec review) — NOT in the payload. The validator enforces a strict\nGitHub URL pattern; non-GitHub URLs surface as 400\n``UNSUPPORTED_PROVIDER`` at the router layer.","properties":{"auth_ref":{"maxLength":128,"minLength":1,"pattern":"^[a-zA-Z0-9_-]+$","title":"Auth Ref","type":"string"},"default_branch":{"default":"main","maxLength":128,"minLength":1,"title":"Default Branch","type":"string"},"name":{"maxLength":128,"minLength":1,"pattern":"^[a-z0-9][a-z0-9-]*$","title":"Name","type":"string"},"pr_base_branch":{"default":"main","maxLength":128,"minLength":1,"title":"Pr Base Branch","type":"string"},"repo_url":{"maxLength":512,"minLength":1,"title":"Repo Url","type":"string"},"webhook_secret_ref":{"anyOf":[{"maxLength":128,"pattern":"^[a-zA-Z0-9_-]+$","type":"string"},{"type":"null"}],"title":"Webhook Secret Ref"}},"required":["name","repo_url","auth_ref"],"title":"CreateConfigRepoRequest","type":"object"},"CreateConversationRequest":{"description":"``POST /api/v1/conversations`` body.","properties":{"title":{"anyOf":[{"maxLength":200,"type":"string"},{"type":"null"}],"title":"Title"}},"title":"CreateConversationRequest","type":"object"},"CreateJudgmentListFromUbiRequest":{"description":"Body for ``POST /api/v1/judgments/generate-from-ubi`` (Story 3.2 / FR-3).\n\nMirrors :class:`backend.app.services.agent_judgments_dispatch.UbiJudgmentGenerationRequest`.\nThe ``@model_validator(mode=\"after\")`` enforces the conditional\nrequiredness of ``current_template_id`` + ``rubric`` per the hybrid\nconverter: REQUIRED when ``converter == 'hybrid_ubi_llm'`` (the LLM-\nfill path needs both); FORBIDDEN otherwise (pure UBI never calls\nthe LLM so accepting them silently would mask operator error).","properties":{"cluster_id":{"maxLength":36,"minLength":1,"title":"Cluster Id","type":"string"},"converter":{"enum":["ctr_threshold","dwell_time","hybrid_ubi_llm"],"title":"Converter","type":"string"},"converter_config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Converter Config"},"current_template_id":{"anyOf":[{"maxLength":36,"minLength":36,"type":"string"},{"type":"null"}],"title":"Current Template Id"},"description":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Description"},"llm_fill_threshold":{"anyOf":[{"minimum":1.0,"type":"integer"},{"type":"null"}],"default":20,"title":"Llm Fill Threshold"},"mapping_strategy":{"default":"reject","enum":["reject","first_match","most_recent"],"title":"Mapping Strategy","type":"string"},"min_impressions_threshold":{"anyOf":[{"minimum":1.0,"type":"integer"},{"type":"null"}],"default":100,"title":"Min Impressions Threshold"},"name":{"maxLength":256,"minLength":1,"title":"Name","type":"string"},"query_set_id":{"maxLength":36,"minLength":1,"title":"Query Set Id","type":"string"},"rubric":{"anyOf":[{"minLength":1,"type":"string"},{"type":"null"}],"title":"Rubric"},"since":{"format":"date-time","title":"Since","type":"string"},"target":{"maxLength":256,"minLength":1,"title":"Target","type":"string"},"until":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Until"}},"required":["name","query_set_id","cluster_id","target","since","converter"],"title":"CreateJudgmentListFromUbiRequest","type":"object"},"CreateJudgmentListGenerateRequest":{"description":"Body for ``POST /api/v1/judgments/generate`` (Story 3.1).","properties":{"cluster_id":{"maxLength":36,"minLength":1,"title":"Cluster Id","type":"string"},"current_template_id":{"maxLength":36,"minLength":1,"title":"Current Template Id","type":"string"},"description":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Description"},"name":{"maxLength":256,"minLength":1,"title":"Name","type":"string"},"query_set_id":{"maxLength":36,"minLength":1,"title":"Query Set Id","type":"string"},"rubric":{"minLength":1,"title":"Rubric","type":"string"},"target":{"maxLength":256,"minLength":1,"title":"Target","type":"string"}},"required":["name","query_set_id","cluster_id","target","current_template_id","rubric"],"title":"CreateJudgmentListGenerateRequest","type":"object"},"CreateProposalRequest":{"description":"Body of ``POST /api/v1/proposals`` (manual proposal creation, FR-4 / AC-6).","properties":{"cluster_id":{"maxLength":36,"minLength":1,"title":"Cluster Id","type":"string"},"config_diff":{"additionalProperties":true,"title":"Config Diff","type":"object"},"metric_delta":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metric Delta"},"template_id":{"maxLength":36,"minLength":1,"title":"Template Id","type":"string"}},"required":["cluster_id","template_id","config_diff"],"title":"CreateProposalRequest","type":"object"},"CreateQuerySetRequest":{"description":"``POST /api/v1/query-sets`` body.\n\n``cluster_id`` is required because Phase 1's shipped schema has\n``query_sets.cluster_id NOT NULL``. Spec FR-3 wording (``cluster_id?``)\nis documented drift tracked at\n``docs/00_overview/planned_features/chore_spec_query_set_cluster_id_drift/idea.md``.","properties":{"cluster_id":{"maxLength":36,"minLength":1,"title":"Cluster Id","type":"string"},"description":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Description"},"name":{"maxLength":256,"minLength":1,"title":"Name","type":"string"}},"required":["name","cluster_id"],"title":"CreateQuerySetRequest","type":"object"},"CreateQueryTemplateRequest":{"description":"Request body for ``POST /api/v1/query-templates``.","properties":{"body":{"minLength":1,"title":"Body","type":"string"},"declared_params":{"additionalProperties":{"type":"string"},"title":"Declared Params","type":"object"},"engine_type":{"enum":["elasticsearch","opensearch","solr"],"title":"Engine Type","type":"string"},"name":{"maxLength":256,"minLength":1,"title":"Name","type":"string"},"parent_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Parent Id"}},"required":["name","engine_type","body"],"title":"CreateQueryTemplateRequest","type":"object"},"CreateStudyRequest":{"description":"``POST /api/v1/studies`` body.\n\n``search_space`` is validated post-Pydantic-parse via\n:class:`backend.app.domain.study.search_space.SearchSpace` so\n:exc:`pydantic.ValidationError` produces the spec's 400\n``INVALID_SEARCH_SPACE`` (per Story 3.3 task 2).\n\nfeat_digest_executable_followups Story 4.2 — optional ``parent`` field\nrecords the parent proposal + followup-index lineage when the study\nwas spawned from a digest \"Run this followup\" action (FR-11).","properties":{"cluster_id":{"maxLength":36,"minLength":1,"title":"Cluster Id","type":"string"},"config":{"$ref":"#/components/schemas/StudyConfigSpec"},"judgment_list_id":{"maxLength":36,"minLength":1,"title":"Judgment List Id","type":"string"},"name":{"maxLength":256,"minLength":1,"title":"Name","type":"string"},"objective":{"$ref":"#/components/schemas/ObjectiveSpec"},"parent":{"anyOf":[{"$ref":"#/components/schemas/ParentFollowupRef"},{"type":"null"}]},"parent_study_id":{"anyOf":[{"maxLength":36,"minLength":36,"type":"string"},{"type":"null"}],"description":"feat_study_clone_from_previous FR-7 — when the operator clones an existing study via the study-detail Clone button, this carries the source study's id. Server validates existence (404 PARENT_STUDY_NOT_FOUND) and same-cluster (422 PARENT_STUDY_WRONG_CLUSTER) before persisting to studies.parent_study_id. Independent of the proposal-lineage 'parent' field (D-5); both may be set.","title":"Parent Study Id"},"query_set_id":{"maxLength":36,"minLength":1,"title":"Query Set Id","type":"string"},"search_space":{"additionalProperties":true,"title":"Search Space","type":"object"},"target":{"maxLength":256,"minLength":1,"title":"Target","type":"string"},"template_id":{"maxLength":36,"minLength":1,"title":"Template Id","type":"string"}},"required":["name","cluster_id","target","template_id","query_set_id","judgment_list_id","search_space","objective","config"],"title":"CreateStudyRequest","type":"object"},"CurvePoint":{"description":"One point on the best-so-far curve.\n\n``trial_number`` is the trial's ``optuna_trial_number`` (the canonical\n\"trial order within the study\" field — see ``auto_followup.py`` module\ndocstring for why we sort by this rather than ``started_at``).\n``best_so_far`` is the running extremum of ``primary_metric`` over all\nearlier trials, sign-corrected to the study's optimization direction.","properties":{"best_so_far":{"title":"Best So Far","type":"number"},"trial_number":{"title":"Trial Number","type":"integer"}},"required":["trial_number","best_so_far"],"title":"CurvePoint","type":"object"},"DigestResponse":{"description":"Body of ``GET /api/v1/studies/{id}/digest`` (FR-3 / AC-3).\n\nfeat_digest_executable_followups Story 4.1 — ``suggested_followups`` is\nnow a discriminated-union list (NarrowFollowup | WidenFollowup |\nTextFollowup), populated by the digest handler via\n``parse_followup_list(digest.suggested_followups, ...)`` so legacy or\nmalformed JSONB payloads never crash the response.","properties":{"generated_at":{"format":"date-time","title":"Generated At","type":"string"},"generated_by":{"title":"Generated By","type":"string"},"id":{"title":"Id","type":"string"},"narrative":{"title":"Narrative","type":"string"},"parameter_importance":{"additionalProperties":{"type":"number"},"title":"Parameter Importance","type":"object"},"recommended_config":{"additionalProperties":true,"title":"Recommended Config","type":"object"},"study_id":{"title":"Study Id","type":"string"},"suggested_followups":{"items":{"$ref":"#/components/schemas/FollowupItem"},"title":"Suggested Followups","type":"array"}},"required":["id","study_id","narrative","parameter_importance","recommended_config","suggested_followups","generated_by","generated_at"],"title":"DigestResponse","type":"object"},"Document":{"description":"A single document by ID — return shape of ``SearchAdapter.get_document``.\n\nMirrors :class:`ScoredHit` minus ``score`` (browsing doesn't need scoring).\n``source`` is ``None`` when the engine's index has ``_source: false`` mapping.","properties":{"doc_id":{"minLength":1,"title":"Doc Id","type":"string"},"source":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Source"}},"required":["doc_id"],"title":"Document","type":"object"},"DocumentListResponse":{"description":"``GET /api/v1/clusters/{cluster_id}/targets/{target}/documents`` response.\n\n``next_cursor`` opaque-encodes the ES ``hits[-1].sort`` array of the\nlast visible row when ``has_more`` is True (see\n``backend.app.api.v1._documents_cursor``). The ``X-Total-Count`` header\non the response carries the engine's ``hits.total.value``.","properties":{"data":{"items":{"$ref":"#/components/schemas/DocumentSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"DocumentListResponse","type":"object"},"DocumentSummary":{"description":"One row in the documents list (per FR-3 / FR-8).\n\n``source`` is the *truncated* preview emitted by\n``backend.app.services.documents.truncate_source_for_list``. The detail\nendpoint returns the untruncated ``Document.source``.","properties":{"doc_id":{"minLength":1,"title":"Doc Id","type":"string"},"source":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Source"}},"required":["doc_id","source"],"title":"DocumentSummary","type":"object"},"FieldSpec":{"description":"One field returned by ``get_schema``.","properties":{"analyzer":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Analyzer"},"doc_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Doc Count"},"name":{"title":"Name","type":"string"},"type":{"title":"Type","type":"string"}},"required":["name","type"],"title":"FieldSpec","type":"object"},"FloatParam":{"additionalProperties":false,"description":"Continuous float parameter.\n\n``log=True`` enables log-uniform sampling\n(Optuna's ``suggest_float(..., log=True)``); requires ``low > 0``.","properties":{"high":{"title":"High","type":"number"},"log":{"default":false,"title":"Log","type":"boolean"},"low":{"title":"Low","type":"number"},"type":{"const":"float","title":"Type","type":"string"}},"required":["type","low","high"],"title":"FloatParam","type":"object"},"FollowupItem":{"discriminator":{"mapping":{"narrow":"#/components/schemas/NarrowFollowup","swap_template":"#/components/schemas/SwapTemplateFollowup","text":"#/components/schemas/TextFollowup","widen":"#/components/schemas/WidenFollowup"},"propertyName":"kind"},"oneOf":[{"$ref":"#/components/schemas/NarrowFollowup"},{"$ref":"#/components/schemas/WidenFollowup"},{"$ref":"#/components/schemas/TextFollowup"},{"$ref":"#/components/schemas/SwapTemplateFollowup"}]},"GenerateJudgmentsResponse":{"description":"Response of ``POST /api/v1/judgments/generate``.\n\nPer GPT-5.5 cycle 1 F5 — the endpoint registers a typed\n``response_model`` so OpenAPI introspection + contract tests can verify\nthe wire shape.","properties":{"judgment_list_id":{"title":"Judgment List Id","type":"string"},"status":{"const":"generating","title":"Status","type":"string"}},"required":["judgment_list_id","status"],"title":"GenerateJudgmentsResponse","type":"object"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"title":"Detail","type":"array"}},"title":"HTTPValidationError","type":"object"},"HeadlineShape":{"description":"Top-line metric value + N(queries) used in the CI.\n\n``metric`` uses ``str`` (not ``ObjectiveMetric``) to avoid a circular\nimport: ``schemas.py`` imports ``ConfidenceShape`` from here, so this\nmodule cannot import back from ``schemas.py``. The upstream value is\nalready validated by the existing ``ObjectiveMetric`` Literal at the\ncreate-study endpoint (``schemas.py:214``).","properties":{"k":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"K"},"metric":{"title":"Metric","type":"string"},"n_queries":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"N Queries"},"value":{"title":"Value","type":"number"}},"required":["metric","value","k","n_queries"],"title":"HeadlineShape","type":"object"},"HealthCheckResult":{"description":"Wire shape of the per-cluster health probe (mirrors ``HealthStatus``).","properties":{"checked_at":{"title":"Checked At","type":"string"},"error":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error"},"status":{"enum":["green","yellow","red","unreachable"],"title":"Status","type":"string"},"version":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Version"}},"required":["status","checked_at"],"title":"HealthCheckResult","type":"object"},"HealthResponse":{"description":"The /healthz response body. Same shape for HTTP 200 and 503.","properties":{"openai_capabilities":{"$ref":"#/components/schemas/OpenAICapabilities"},"openai_endpoint":{"description":"Configured OPENAI_BASE_URL","title":"Openai Endpoint","type":"string"},"status":{"enum":["ok","degraded"],"title":"Status","type":"string"},"subsystems":{"$ref":"#/components/schemas/Subsystems"},"uptime_seconds":{"description":"Seconds since the API process started","title":"Uptime Seconds","type":"integer"},"version":{"description":"Application version (relyloop_git_sha)","title":"Version","type":"string"}},"required":["status","subsystems","openai_endpoint","openai_capabilities","version","uptime_seconds"],"title":"HealthResponse","type":"object"},"ImportJudgmentItem":{"description":"One row in :class:`ImportJudgmentListRequest`.","properties":{"doc_id":{"maxLength":512,"minLength":1,"title":"Doc Id","type":"string"},"notes":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Notes"},"query_id":{"maxLength":36,"minLength":1,"title":"Query Id","type":"string"},"rating":{"enum":[0,1,2,3],"title":"Rating","type":"integer"}},"required":["query_id","doc_id","rating"],"title":"ImportJudgmentItem","type":"object"},"ImportJudgmentListRequest":{"description":"Body for ``POST /api/v1/judgment-lists/import`` (Story 3.2).","properties":{"cluster_id":{"maxLength":36,"minLength":1,"title":"Cluster Id","type":"string"},"description":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Description"},"judgments":{"items":{"$ref":"#/components/schemas/ImportJudgmentItem"},"maxItems":100000,"minItems":1,"title":"Judgments","type":"array"},"name":{"maxLength":256,"minLength":1,"title":"Name","type":"string"},"query_set_id":{"maxLength":36,"minLength":1,"title":"Query Set Id","type":"string"},"rubric":{"minLength":1,"title":"Rubric","type":"string"},"target":{"maxLength":256,"minLength":1,"title":"Target","type":"string"}},"required":["name","query_set_id","cluster_id","target","rubric","judgments"],"title":"ImportJudgmentListRequest","type":"object"},"IntParam":{"additionalProperties":false,"description":"Integer parameter inclusive of both bounds.","properties":{"high":{"title":"High","type":"integer"},"low":{"title":"Low","type":"integer"},"type":{"const":"int","title":"Type","type":"string"}},"required":["type","low","high"],"title":"IntParam","type":"object"},"JudgmentListDetail":{"description":"``GET /api/v1/judgment-lists/{id}`` response.\n\nNote: ``generation_params`` is populated for UBI lists (feat_ubi_judgments\nStory 1.1's JSONB column) and NULL for LLM lists. The Story 4.3 UI\n(```` + ````) reads the\npayload to discriminate UBI/hybrid lists and to reconstruct the\noriginal request for the ambiguous-skip \"Re-run with most_recent\"\naffordance.","properties":{"calibration":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Calibration"},"cluster_id":{"title":"Cluster Id","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"current_template_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Current Template Id"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"failed_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Failed Reason"},"generation_params":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Generation Params"},"id":{"title":"Id","type":"string"},"judgment_count":{"title":"Judgment Count","type":"integer"},"name":{"title":"Name","type":"string"},"query_set_id":{"title":"Query Set Id","type":"string"},"rubric":{"title":"Rubric","type":"string"},"source_breakdown":{"$ref":"#/components/schemas/_SourceBreakdown"},"status":{"enum":["generating","complete","failed"],"title":"Status","type":"string"},"target":{"title":"Target","type":"string"}},"required":["id","name","description","query_set_id","cluster_id","target","current_template_id","rubric","status","failed_reason","judgment_count","source_breakdown","calibration","generation_params","created_at"],"title":"JudgmentListDetail","type":"object"},"JudgmentListJudgmentsResponse":{"description":"``GET /api/v1/judgment-lists/{id}/judgments`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/JudgmentRow"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"JudgmentListJudgmentsResponse","type":"object"},"JudgmentListListResponse":{"description":"``GET /api/v1/judgment-lists`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/JudgmentListSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"JudgmentListListResponse","type":"object"},"JudgmentListRef":{"description":"One entry in the ``QUERY_HAS_JUDGMENTS`` 409 envelope.\n\nLives in ``detail.judgment_lists``. Maps from the repo-layer\n:class:`backend.app.db.repo.judgment.JudgmentListRefRow` at the\nrouter boundary.","properties":{"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"}},"required":["id","name"],"title":"JudgmentListRef","type":"object"},"JudgmentListSummary":{"description":"List-view row on ``GET /api/v1/judgment-lists``.","properties":{"cluster_id":{"title":"Cluster Id","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"query_set_id":{"title":"Query Set Id","type":"string"},"status":{"enum":["generating","complete","failed"],"title":"Status","type":"string"},"target":{"title":"Target","type":"string"}},"required":["id","name","description","query_set_id","cluster_id","target","status","created_at"],"title":"JudgmentListSummary","type":"object"},"JudgmentRow":{"description":"``GET /api/v1/judgment-lists/{id}/judgments`` row + PATCH response.","properties":{"confidence":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Confidence"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"doc_id":{"title":"Doc Id","type":"string"},"id":{"title":"Id","type":"string"},"judgment_list_id":{"title":"Judgment List Id","type":"string"},"notes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Notes"},"query_id":{"title":"Query Id","type":"string"},"rater_ref":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rater Ref"},"rating":{"enum":[0,1,2,3],"title":"Rating","type":"integer"},"source":{"enum":["llm","human","click"],"title":"Source","type":"string"}},"required":["id","judgment_list_id","query_id","doc_id","rating","source","rater_ref","confidence","notes","created_at"],"title":"JudgmentRow","type":"object"},"LateTrialStddevShape":{"description":"Sample stddev of ``primary_metric`` over the late-trial window.","properties":{"min_window_required":{"title":"Min Window Required","type":"integer"},"value":{"title":"Value","type":"number"},"window_size":{"title":"Window Size","type":"integer"}},"required":["value","window_size","min_window_required"],"title":"LateTrialStddevShape","type":"object"},"MessageWire":{"description":"One row of ``GET /api/v1/conversations/{id}.messages``.","properties":{"content":{"additionalProperties":true,"title":"Content","type":"object"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"id":{"title":"Id","type":"string"},"role":{"enum":["user","assistant","tool"],"title":"Role","type":"string"},"tool_calls":{"anyOf":[{"items":{"additionalProperties":true,"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"required":["id","role","content","created_at"],"title":"MessageWire","type":"object"},"NarrowFollowup":{"additionalProperties":false,"description":"A 'narrow' followup — re-run with a tighter range than the parent.","properties":{"kind":{"const":"narrow","title":"Kind","type":"string"},"rationale":{"title":"Rationale","type":"string"},"search_space":{"$ref":"#/components/schemas/SearchSpace"}},"required":["kind","rationale","search_space"],"title":"NarrowFollowup","type":"object"},"ObjectiveSpec":{"description":"Wire shape of ``studies.objective`` (write-side validated at create).\n\n``k`` is required for ``ndcg`` / ``precision`` / ``recall`` (per\nstandard IR-evaluation conventions: those metrics are computed at a\ncutoff rank). ``map`` accepts ``k`` optionally; ``mrr`` / ``err`` ignore\nit. The model_validator enforces this so a malformed objective\nsurfaces as 400 ``INVALID_SEARCH_SPACE`` / 422 ``VALIDATION_ERROR``\nat study-create time rather than failing later inside ``run_trial``\nwhen the worker computes the metric.","properties":{"direction":{"default":"maximize","enum":["maximize","minimize"],"title":"Direction","type":"string"},"k":{"anyOf":[{"enum":[1,3,5,10,20,50,100],"type":"integer"},{"type":"null"}],"title":"K"},"metric":{"enum":["ndcg","map","precision","recall","mrr"],"title":"Metric","type":"string"}},"required":["metric"],"title":"ObjectiveSpec","type":"object"},"OpenAICapabilities":{"description":"Cached results of the OpenAI capability check (Story 3.3 populates Redis).\n\nStep 1 (``models_endpoint``) is reported first because it gates the rest:\nwhen it fails, the other three are reported as ``\"untested\"``. The\n``models_endpoint_status_code`` field is required-but-nullable\n(per ``bug_openai_capability_check_incapable_on_valid_key`` spec §19 D-3/D-8)\n— always present in the JSON, ``null`` when not applicable. This lets\noperators distinguish ``401 -> bad key``, ``429 -> quota``,\n``5xx -> upstream outage``, ``null -> network unreachable / cache miss``.","properties":{"chat":{"description":"Chat completion probe result","enum":["ok","fail","untested"],"title":"Chat","type":"string"},"function_calling":{"description":"Function-calling probe result (tool_choice=required)","enum":["ok","fail","untested"],"title":"Function Calling","type":"string"},"models_endpoint":{"description":"GET /models probe outcome. 'ok' / 'fail' are projected from CapabilityResult.models_endpoint; 'untested' is the cache-miss default, matching the existing chat / function_calling / structured_output cache-miss handling.","enum":["ok","fail","untested"],"title":"Models Endpoint","type":"string"},"models_endpoint_status_code":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"HTTP status code from the GET /models probe when it HTTP-failed (>= 400). null for the success path, network-class failure (timeout / DNS / connection-refused), or cache miss. Required-but-nullable: the JSON key is always present with explicit null when no value, never omitted.","title":"Models Endpoint Status Code"},"structured_output":{"description":"JSON-schema response_format probe result","enum":["ok","fail","untested"],"title":"Structured Output","type":"string"}},"required":["models_endpoint","models_endpoint_status_code","chat","function_calling","structured_output"],"title":"OpenAICapabilities","type":"object"},"OpenPrResponse":{"description":"Body of ``POST /api/v1/proposals/{id}/open_pr`` (FR-1).\n\nReturned with HTTP 202 on successful enqueue. Status is always\n``'pending'`` at enqueue time; the worker flips it to ``'pr_opened'``\nafter the PR is open.","properties":{"message":{"title":"Message","type":"string"},"proposal_id":{"title":"Proposal Id","type":"string"},"status":{"const":"pending","title":"Status","type":"string"}},"required":["proposal_id","status","message"],"title":"OpenPrResponse","type":"object"},"OverrideJudgmentRequest":{"description":"Body for ``PATCH /api/v1/judgment-lists/{id}/judgments/{judgment_id}``.\n\n``rating`` is INTENTIONALLY unbounded at the Pydantic layer — spec §8.5\nrequires out-of-range failures to surface as 400 ``INVALID_RATING`` (not\nPydantic's default 422 ``VALIDATION_ERROR``). The handler validates the\nvalue manually and raises the domain code (per GPT-5.5 cycle 1 F4).","properties":{"notes":{"anyOf":[{"maxLength":2000,"type":"string"},{"type":"null"}],"title":"Notes"},"rating":{"title":"Rating","type":"integer"}},"required":["rating"],"title":"OverrideJudgmentRequest","type":"object"},"ParentFollowupRef":{"description":"Optional lineage payload on ``POST /api/v1/studies``.\n\nfeat_digest_executable_followups FR-11 — when the operator clicks\n\"Run this followup\" on a proposal's digest card, the create-study\npayload carries the parent proposal's id + the 0-based index into\nthe digest's ``suggested_followups`` array so the spawned study\nremembers where it came from.\n\n``proposal_id`` is a UUIDv7 (36-char hex). The exact-length bound\nforces malformed strings to surface as 422 ``VALIDATION_ERROR``\nrather than reach the DB FK check and emerge as a 404\n``PROPOSAL_NOT_FOUND``.","properties":{"followup_index":{"minimum":0.0,"title":"Followup Index","type":"integer"},"proposal_id":{"maxLength":36,"minLength":36,"title":"Proposal Id","type":"string"}},"required":["proposal_id","followup_index"],"title":"ParentFollowupRef","type":"object"},"PerQueryOutcomesShape":{"description":"Per-query outcome counts + the top-5 named regressors and improvers.","properties":{"comparison_against":{"enum":["runner_up","baseline"],"title":"Comparison Against","type":"string"},"improved":{"title":"Improved","type":"integer"},"regressed":{"title":"Regressed","type":"integer"},"top_improvers":{"default":[],"items":{"$ref":"#/components/schemas/RegressorRowShape"},"title":"Top Improvers","type":"array"},"top_regressors":{"items":{"$ref":"#/components/schemas/RegressorRowShape"},"title":"Top Regressors","type":"array"},"unchanged":{"title":"Unchanged","type":"integer"}},"required":["improved","unchanged","regressed","comparison_against","top_regressors"],"title":"PerQueryOutcomesShape","type":"object"},"ProposalDetail":{"description":"Body of the proposal detail endpoints.\n\nUsed by ``GET /api/v1/proposals/{id}``, ``POST /api/v1/proposals``,\nand ``POST /api/v1/proposals/{id}/reject``.","properties":{"cluster":{"$ref":"#/components/schemas/_ClusterEmbed"},"config_diff":{"additionalProperties":true,"title":"Config Diff","type":"object"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"digest":{"anyOf":[{"$ref":"#/components/schemas/_DigestEmbed"},{"type":"null"}]},"id":{"title":"Id","type":"string"},"is_currently_live":{"default":false,"title":"Is Currently Live","type":"boolean"},"metric_delta":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metric Delta"},"pr_merged_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Pr Merged At"},"pr_open_error":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Pr Open Error"},"pr_state":{"anyOf":[{"enum":["open","closed","merged"],"type":"string"},{"type":"null"}],"title":"Pr State"},"pr_url":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Pr Url"},"rejected_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rejected Reason"},"status":{"enum":["pending","pr_opened","pr_merged","rejected"],"title":"Status","type":"string"},"study_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Study Id"},"study_summary":{"anyOf":[{"$ref":"#/components/schemas/_StudySummary"},{"type":"null"}]},"study_trial_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Study Trial Id"},"template":{"$ref":"#/components/schemas/_TemplateEmbed"}},"required":["id","study_id","study_summary","study_trial_id","cluster","template","config_diff","metric_delta","status","pr_url","pr_state","pr_merged_at","pr_open_error","rejected_reason","digest","created_at"],"title":"ProposalDetail","type":"object"},"ProposalSummary":{"description":"Row in the ``GET /api/v1/proposals`` list response.","properties":{"cluster":{"$ref":"#/components/schemas/_ClusterEmbed"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"id":{"title":"Id","type":"string"},"is_currently_live":{"default":false,"title":"Is Currently Live","type":"boolean"},"metric_delta":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metric Delta"},"pr_state":{"anyOf":[{"enum":["open","closed","merged"],"type":"string"},{"type":"null"}],"title":"Pr State"},"pr_url":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Pr Url"},"status":{"enum":["pending","pr_opened","pr_merged","rejected"],"title":"Status","type":"string"},"study_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Study Id"},"template":{"$ref":"#/components/schemas/_TemplateEmbed"}},"required":["id","study_id","cluster","template","status","pr_state","pr_url","metric_delta","created_at"],"title":"ProposalSummary","type":"object"},"ProposalsListResponse":{"description":"Body of ``GET /api/v1/proposals``.","properties":{"data":{"items":{"$ref":"#/components/schemas/ProposalSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"ProposalsListResponse","type":"object"},"QueryHasJudgmentsDetail":{"description":"The ``detail`` object of a 409 ``QUERY_HAS_JUDGMENTS`` response.\n\nExtends the canonical ``{error_code, message, retryable}`` envelope\nwith two structured fields the frontend consumes directly\n(``judgment_lists`` + ``overflow_count``). Wired into the FastAPI\nroute's ``responses={409: {\"model\": QueryHasJudgmentsEnvelope}}`` so\nthe OpenAPI schema documents the contract.","properties":{"error_code":{"const":"QUERY_HAS_JUDGMENTS","title":"Error Code","type":"string"},"judgment_lists":{"items":{"$ref":"#/components/schemas/JudgmentListRef"},"title":"Judgment Lists","type":"array"},"message":{"title":"Message","type":"string"},"overflow_count":{"title":"Overflow Count","type":"integer"},"retryable":{"const":false,"title":"Retryable","type":"boolean"}},"required":["error_code","message","retryable","judgment_lists","overflow_count"],"title":"QueryHasJudgmentsDetail","type":"object"},"QueryHasJudgmentsEnvelope":{"description":"Top-level 409 wrapper (FastAPI nests under ``detail`` for HTTPException).","properties":{"detail":{"$ref":"#/components/schemas/QueryHasJudgmentsDetail"}},"required":["detail"],"title":"QueryHasJudgmentsEnvelope","type":"object"},"QueryListResponse":{"description":"``GET /api/v1/query-sets/{set_id}/queries`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/QueryRow"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"QueryListResponse","type":"object"},"QueryRow":{"description":"Wire row returned by the per-query GET + PATCH endpoints.\n\nUsed by both ``GET /api/v1/query-sets/{set_id}/queries`` and\n``PATCH /api/v1/query-sets/{set_id}/queries/{query_id}``.\n``judgment_count`` is a derived field — single batched GROUP BY in the\nrouter via :func:`backend.app.db.repo.judgment.count_judgments_per_query`.","properties":{"id":{"title":"Id","type":"string"},"judgment_count":{"title":"Judgment Count","type":"integer"},"query_metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Query Metadata"},"query_text":{"title":"Query Text","type":"string"},"reference_answer":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Reference Answer"}},"required":["id","query_text","reference_answer","query_metadata","judgment_count"],"title":"QueryRow","type":"object"},"QuerySetDetail":{"description":"``GET /api/v1/query-sets/{id}`` response.","properties":{"cluster_id":{"title":"Cluster Id","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"query_count":{"title":"Query Count","type":"integer"}},"required":["id","name","description","cluster_id","query_count","created_at"],"title":"QuerySetDetail","type":"object"},"QuerySetListResponse":{"description":"``GET /api/v1/query-sets`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/QuerySetSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"QuerySetListResponse","type":"object"},"QuerySetSummary":{"description":"List-view shape.\n\n``query_count`` is the number of queries in the set. It is resolved\nvia a single batched ``GROUP BY query_set_id`` aggregate per page\n(``repo.count_queries_for_sets``), NOT a per-row count — so the\nlist endpoint stays at a fixed 2 queries (the page + the count\naggregate) regardless of page size. This is the same no-N+1 pattern\n``feat_studies_convergence_visibility`` (PR #421) used for the\nstudies-list ``trial_count`` field.","properties":{"cluster_id":{"title":"Cluster Id","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"query_count":{"title":"Query Count","type":"integer"}},"required":["id","name","cluster_id","query_count","created_at"],"title":"QuerySetSummary","type":"object"},"QueryTemplateDetail":{"description":"``GET /api/v1/query-templates/{id}`` response.","properties":{"body":{"title":"Body","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"declared_params":{"additionalProperties":{"type":"string"},"title":"Declared Params","type":"object"},"engine_type":{"enum":["elasticsearch","opensearch","solr"],"title":"Engine Type","type":"string"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"parent_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Parent Id"},"version":{"title":"Version","type":"integer"}},"required":["id","name","engine_type","body","declared_params","version","parent_id","created_at"],"title":"QueryTemplateDetail","type":"object"},"QueryTemplateListResponse":{"description":"``GET /api/v1/query-templates`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/QueryTemplateSummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"QueryTemplateListResponse","type":"object"},"QueryTemplateSummary":{"description":"List-view shape; drops ``body`` + the full ``declared_params`` dict.\n\nSurfaces ``param_count`` (= ``len(declared_params)``) so the\ntemplates list can show each template's tuning surface at a glance.\n``param_count`` is free to compute — ``declared_params`` is a JSONB\ncolumn already loaded on the row (not a child relationship), so the\ncount is ``len(row.declared_params)`` with no extra query and no\nN+1 risk. The full dict remains on ``QueryTemplateDetail``.","properties":{"created_at":{"format":"date-time","title":"Created At","type":"string"},"engine_type":{"enum":["elasticsearch","opensearch","solr"],"title":"Engine Type","type":"string"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"param_count":{"title":"Param Count","type":"integer"},"version":{"title":"Version","type":"integer"}},"required":["id","name","engine_type","version","param_count","created_at"],"title":"QueryTemplateSummary","type":"object"},"RegressorRowShape":{"description":"One row in the named-regressors or named-improvers table.\n\nUsed for BOTH the ``top_regressors`` and ``top_improvers`` lists.\nThe wire shape is identical — ``delta = winner_score - comparison_score``\nis negative on the regressor list, positive on the improver list. The\nclass name is historical (regressors shipped first); reusing the same\ntype keeps the schema and the per-row renderer compact.","properties":{"comparison_score":{"title":"Comparison Score","type":"number"},"delta":{"title":"Delta","type":"number"},"query_id":{"title":"Query Id","type":"string"},"query_text":{"title":"Query Text","type":"string"},"winner_score":{"title":"Winner Score","type":"number"}},"required":["query_id","query_text","winner_score","comparison_score","delta"],"title":"RegressorRowShape","type":"object"},"RejectProposalRequest":{"description":"Body of ``POST /api/v1/proposals/{id}/reject`` (FR-4 / AC-5).","properties":{"reason":{"anyOf":[{"maxLength":500,"type":"string"},{"type":"null"}],"title":"Reason"}},"title":"RejectProposalRequest","type":"object"},"ReseedStatusResponse":{"additionalProperties":false,"description":"Polling-endpoint response for ``GET /api/v1/_test/demo/reseed/status``.\n\nPer ``bug_demo_reseed_fake_metric_regression`` D-2. Lives in Redis as a\nsingle JSON blob keyed by :data:`DEMO_RESEED_STATUS_KEY` so the\nhandler reads it in one round-trip.","properties":{"current_step":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Current Step"},"failed_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Failed Reason"},"finished_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Finished At"},"scenarios_completed":{"default":0,"title":"Scenarios Completed","type":"integer"},"scenarios_skipped":{"items":{"type":"string"},"title":"Scenarios Skipped","type":"array"},"scenarios_total":{"default":0,"title":"Scenarios Total","type":"integer"},"started_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Started At"},"status":{"enum":["idle","running","complete","failed"],"title":"Status","type":"string"},"steps":{"items":{"type":"string"},"title":"Steps","type":"array"},"summary":{"anyOf":[{"$ref":"#/components/schemas/ReseedSummary"},{"type":"null"}]}},"required":["status"],"title":"ReseedStatusResponse","type":"object"},"ReseedSummary":{"additionalProperties":false,"description":"Returned by :func:`reseed_demo_state` on success.\n\nPer spec §9 Required invariants, every counter is exactly 4 on the\nhappy path; ``duration_ms`` is wall-clock from orchestration start\nto the rename commit.","properties":{"clusters_created":{"title":"Clusters Created","type":"integer"},"duration_ms":{"title":"Duration Ms","type":"integer"},"proposals_created":{"title":"Proposals Created","type":"integer"},"query_sets_created":{"title":"Query Sets Created","type":"integer"},"studies_completed":{"title":"Studies Completed","type":"integer"}},"required":["clusters_created","query_sets_created","studies_completed","proposals_created","duration_ms"],"title":"ReseedSummary","type":"object"},"RunQueryHit":{"description":"One hit in the ``run_query`` response.","properties":{"doc_id":{"title":"Doc Id","type":"string"},"score":{"title":"Score","type":"number"},"source":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Source"}},"required":["doc_id","score"],"title":"RunQueryHit","type":"object"},"RunQueryRequest":{"description":"``POST /api/v1/clusters/{id}/run_query`` body.","properties":{"query_dsl":{"additionalProperties":true,"title":"Query Dsl","type":"object"},"target":{"maxLength":256,"minLength":1,"title":"Target","type":"string"},"top_k":{"default":10,"maximum":1000.0,"minimum":1.0,"title":"Top K","type":"integer"}},"required":["target","query_dsl"],"title":"RunQueryRequest","type":"object"},"RunQueryResponse":{"description":"``POST /api/v1/clusters/{id}/run_query`` response.","properties":{"hits":{"items":{"$ref":"#/components/schemas/RunQueryHit"},"title":"Hits","type":"array"}},"required":["hits"],"title":"RunQueryResponse","type":"object"},"RunnerUpGapShape":{"description":"Runner-up trial's metric vs the winner.\n\nThe whole shape is suppressed to ``None`` when there are <2 complete\ntrials (FR-2 + FR-7); ``classification`` is non-null whenever this shape\nis present.","properties":{"classification":{"enum":["robust_plateau","sharp_peak"],"title":"Classification","type":"string"},"runner_up_metric":{"title":"Runner Up Metric","type":"number"},"top10_within":{"title":"Top10 Within","type":"number"},"value":{"title":"Value","type":"number"}},"required":["value","classification","top10_within","runner_up_metric"],"title":"RunnerUpGapShape","type":"object"},"Schema":{"description":"An index / collection's field schema.","properties":{"fields":{"items":{"$ref":"#/components/schemas/FieldSpec"},"title":"Fields","type":"array"},"name":{"title":"Name","type":"string"}},"required":["name","fields"],"title":"Schema","type":"object"},"SearchSpace":{"additionalProperties":false,"description":"Pydantic model for the ``studies.search_space`` JSONB column.\n\nWire format::\n\n {\n \"params\": {\n \"boost_title\": {\"type\": \"float\", \"low\": 0.1, \"high\": 10.0, \"log\": true},\n \"min_should_match\": {\"type\": \"int\", \"low\": 1, \"high\": 5},\n \"operator\": {\"type\": \"categorical\", \"choices\": [\"and\", \"or\"]},\n }\n }","properties":{"params":{"additionalProperties":{"discriminator":{"mapping":{"categorical":"#/components/schemas/CategoricalParam","float":"#/components/schemas/FloatParam","int":"#/components/schemas/IntParam"},"propertyName":"type"},"oneOf":[{"$ref":"#/components/schemas/FloatParam"},{"$ref":"#/components/schemas/IntParam"},{"$ref":"#/components/schemas/CategoricalParam"}]},"minProperties":1,"title":"Params","type":"object"}},"required":["params"],"title":"SearchSpace","type":"object"},"SeedAutoFollowupChainRequest":{"additionalProperties":false,"description":"Payload for ``POST /api/v1/_test/auto-followup/seed-chain``.\n\nSeeds ``depth + 1`` linked studies (root → … → leaf) so E2E tests can\ncover the chain-panel parent-link / children-table / cascade-radio paths\nthat the public ``POST /api/v1/studies`` endpoint can't drive\n(``parent_study_id`` is set only by the auto-followup worker).\n\nCloses ``chore_auto_followup_e2e_chain_seed_helper`` (idea #2).","properties":{"cluster_id":{"minLength":1,"title":"Cluster Id","type":"string"},"depth":{"description":"Number of chain hops to seed. depth=1 → root + leaf (2 nodes). depth=2 → root + 1 middle + leaf (3 nodes).","maximum":5.0,"minimum":1.0,"title":"Depth","type":"integer"},"in_flight_leaf":{"default":true,"description":"When True (default), the deepest node is left at status='queued'. When False, it's driven to 'completed' too. Default True matches the primary E2E use case: cascade-radio coverage where the middle node needs an in-flight child.","title":"In Flight Leaf","type":"boolean"},"in_flight_middle":{"default":true,"description":"When True (default), the immediate parent of the leaf is left at status='queued' so the Cancel button is enabled (canCancel = running || queued per study-action-bar.tsx:46). Required for the cancel-modal cascade-radio test. When False, all intermediates are completed (more realistic chain state but cancel modal won't open on the middle).","title":"In Flight Middle","type":"boolean"},"judgment_list_id":{"minLength":1,"title":"Judgment List Id","type":"string"},"query_set_id":{"minLength":1,"title":"Query Set Id","type":"string"},"template_id":{"minLength":1,"title":"Template Id","type":"string"}},"required":["cluster_id","query_set_id","template_id","judgment_list_id","depth"],"title":"SeedAutoFollowupChainRequest","type":"object"},"SeedAutoFollowupChainResponse":{"description":"IDs of every node in the seeded chain, in parent→child order.","properties":{"leaf_id":{"title":"Leaf Id","type":"string"},"middle_ids":{"items":{"type":"string"},"title":"Middle Ids","type":"array"},"root_id":{"title":"Root Id","type":"string"}},"required":["root_id","middle_ids","leaf_id"],"title":"SeedAutoFollowupChainResponse","type":"object"},"SeedCompletedStudyRequest":{"additionalProperties":false,"description":"Payload for ``POST /api/v1/_test/studies/seed-completed``.\n\nAll four FK fields are required; the caller is responsible for\nseeding the parent rows first (typically via the public\n``seedFullChain`` E2E helper).","properties":{"cluster_id":{"minLength":1,"title":"Cluster Id","type":"string"},"extra_trial_metrics":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"description":"Optional list of additional complete-trial `primary_metric` values (numbered from 2 upward) seeded on top of the default winner (0.487) + runner-up (0.412). Used to push the study past the convergence classifier's usable-trial floor (5) so the `` renders a real verdict + curve instead of the too_few_trials null state (feat_study_convergence_indicator). Every value MUST be < 0.487 so the winner / best_metric / proposal / digest stay anchored to the unchanged 0.412 -> 0.487 story. Omit for the default 2-trial shape.","title":"Extra Trial Metrics"},"judgment_list_id":{"minLength":1,"title":"Judgment List Id","type":"string"},"query_set_id":{"minLength":1,"title":"Query Set Id","type":"string"},"runner_up_per_query":{"anyOf":[{"additionalProperties":{"additionalProperties":true,"type":"object"},"type":"object"},{"type":"null"}],"description":"Optional per-query metrics for the runner-up trial; pairs with `winner_per_query`.","title":"Runner Up Per Query"},"suggested_followups":{"anyOf":[{"items":{"additionalProperties":true,"type":"object"},"type":"array"},{"type":"null"}],"description":"feat_digest_executable_followups Story 6.1 — optional structured FollowupItem list (`[{kind, rationale, search_space}]`) to seed on the digest. When omitted, the seeder writes two default text-kind items. The E2E Run-followup spec passes a `narrow` item so it can drive the per-card Run button + modal prefill flow.","title":"Suggested Followups"},"template_id":{"minLength":1,"title":"Template Id","type":"string"},"winner_per_query":{"anyOf":[{"additionalProperties":{"additionalProperties":true,"type":"object"},"type":"object"},{"type":"null"}],"description":"Optional per-query metrics dict to populate on the winner trial. Shape: `{query_id: {metric_token: float}}` where metric_token matches what `scoring.score()` emits (e.g. `ndcg@10`). Set alongside `runner_up_per_query` to drive the ConfidencePanel happy path on `/studies/[id]`. When omitted, the seeded trials have `per_query_metrics IS NULL` (the pre-feat_pr_metric_confidence shape).","title":"Winner Per Query"},"with_pending_proposal":{"default":true,"description":"When true (default), also insert a `status='pending'` proposal linked to the study so the digest panel's Open PR button renders enabled. Set false to test the AC-11 aria-disabled-button + tooltip path.","title":"With Pending Proposal","type":"boolean"}},"required":["cluster_id","query_set_id","template_id","judgment_list_id"],"title":"SeedCompletedStudyRequest","type":"object"},"SeedCompletedStudyResponse":{"description":"IDs of the inserted rows; mirrors :class:`SeededStudyTriple`.","properties":{"digest_id":{"title":"Digest Id","type":"string"},"proposal_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Proposal Id"},"study_id":{"title":"Study Id","type":"string"}},"required":["study_id","digest_id","proposal_id"],"title":"SeedCompletedStudyResponse","type":"object"},"SendMessageRequest":{"description":"``POST /api/v1/conversations/{id}/messages`` body (Story 3.2).","properties":{"content":{"$ref":"#/components/schemas/SendMessageRequestContent"},"role":{"const":"user","default":"user","title":"Role","type":"string"}},"required":["content"],"title":"SendMessageRequest","type":"object"},"SendMessageRequestContent":{"description":"Sub-shape inside :class:`SendMessageRequest`.","properties":{"text":{"maxLength":20000,"minLength":1,"title":"Text","type":"string"}},"required":["text"],"title":"SendMessageRequestContent","type":"object"},"StudyChainLink":{"description":"One link in the rolled-up overnight-chain summary (feat_overnight_autopilot §8.3).","properties":{"auto_followup_depth_remaining":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Auto Followup Depth Remaining"},"baseline_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Baseline Metric"},"best_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Best Metric"},"completed_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Completed At"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"delta_from_prev":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Delta From Prev"},"direction":{"enum":["maximize","minimize"],"title":"Direction","type":"string"},"failed_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Failed Reason"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"proposal_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Proposal Id"},"selected_followup_kind":{"anyOf":[{"enum":["narrow_default","narrow","widen","swap_template"],"type":"string"},{"type":"null"}],"title":"Selected Followup Kind"},"status":{"enum":["queued","running","completed","cancelled","failed"],"title":"Status","type":"string"},"template_id":{"title":"Template Id","type":"string"}},"required":["id","name","status","best_metric","baseline_metric","direction","delta_from_prev","proposal_id","auto_followup_depth_remaining","failed_reason","created_at","completed_at","template_id"],"title":"StudyChainLink","type":"object"},"StudyChainResponse":{"description":"``GET /api/v1/studies/{id}/chain`` response (feat_overnight_autopilot §8.3).","properties":{"anchor_study_id":{"title":"Anchor Study Id","type":"string"},"best_link_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Best Link Id"},"best_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Best Metric"},"cumulative_lift":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Cumulative Lift"},"direction":{"enum":["maximize","minimize"],"title":"Direction","type":"string"},"links":{"items":{"$ref":"#/components/schemas/StudyChainLink"},"title":"Links","type":"array"},"proposal_id_for_best_link":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Proposal Id For Best Link"},"stop_reason":{"enum":["depth_exhausted","no_lift","budget","parent_failed","cancelled","in_flight"],"title":"Stop Reason","type":"string"}},"required":["anchor_study_id","best_link_id","best_metric","cumulative_lift","direction","stop_reason","proposal_id_for_best_link","links"],"title":"StudyChainResponse","type":"object"},"StudyConfigSpec":{"description":"Wire shape of ``studies.config`` (write-side).\n\nThe model_validator below enforces that at least one stop condition is\nset — otherwise the study has no terminating condition (FR-4).\n``parallelism`` / ``trial_timeout_s`` are optional; when absent the\nworker reads ``Settings.studies_default_parallelism`` /\n``studies_default_timeout_s`` at job time. The API layer does NOT\nmaterialize these fields into the stored row — see Story 1.5 +\nStory 3.3's ``config.model_dump(exclude_none=True, exclude_unset=True)``\ncontract.","properties":{"auto_followup_depth":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Auto Followup Depth"},"auto_followup_strategy":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Auto Followup Strategy"},"baseline_params":{"anyOf":[{"additionalProperties":{"anyOf":[{"type":"string"},{"type":"integer"},{"type":"number"},{"type":"boolean"},{"type":"null"}]},"type":"object"},{"type":"null"}],"title":"Baseline Params"},"max_trials":{"anyOf":[{"maximum":100000.0,"minimum":1.0,"type":"integer"},{"type":"null"}],"title":"Max Trials"},"parallelism":{"anyOf":[{"maximum":64.0,"minimum":1.0,"type":"integer"},{"type":"null"}],"title":"Parallelism"},"pruner":{"anyOf":[{"enum":["median","none"],"type":"string"},{"type":"null"}],"title":"Pruner"},"sampler":{"anyOf":[{"enum":["tpe","random"],"type":"string"},{"type":"null"}],"title":"Sampler"},"secondary_metrics":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Secondary Metrics"},"seed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Seed"},"time_budget_min":{"anyOf":[{"exclusiveMinimum":0.0,"type":"number"},{"type":"null"}],"title":"Time Budget Min"},"trial_timeout_s":{"anyOf":[{"maximum":3600.0,"minimum":5.0,"type":"integer"},{"type":"null"}],"title":"Trial Timeout S"}},"title":"StudyConfigSpec","type":"object"},"StudyConvergenceShape":{"description":"Verdict + supporting numerics for the UI panel and the digest narrative.\n\nMirrors the ``ConfidenceShape`` pattern from ``confidence.py``: the\ndomain module owns the Pydantic model, and ``backend.app.api.v1.schemas``\nre-exports it for the ``StudyDetail.convergence`` field. The\n``best_so_far_curve`` is the chart's data series; ``verdict`` is the\nbadge label.\n\n**Name discipline (plan §0).** The bare class name ``ConvergenceShape``\nis already taken by :class:`backend.app.domain.study.confidence.ConvergenceShape`\n(a different concept — winner-trial *timing*, not metric plateau).\n``StudyConvergenceShape`` is the study-level analogue; the confidence\nsub-shape stays on its inner module. The two coexist on ``StudyDetail``\n(``confidence.convergence`` is the inner one; ``convergence`` is this\none), and FastAPI emits both under their bare class names in the\nOpenAPI schema — no fully-qualified disambiguation noise leaks to the\nfrontend.","properties":{"best_so_far_curve":{"items":{"$ref":"#/components/schemas/CurvePoint"},"title":"Best So Far Curve","type":"array"},"direction":{"enum":["maximize","minimize"],"title":"Direction","type":"string"},"epsilon":{"title":"Epsilon","type":"number"},"improvement_in_window":{"title":"Improvement In Window","type":"number"},"total_complete_trials":{"title":"Total Complete Trials","type":"integer"},"verdict":{"enum":["converged","still_improving","too_few_trials"],"title":"Verdict","type":"string"},"warmup_floor":{"title":"Warmup Floor","type":"integer"},"window_size":{"title":"Window Size","type":"integer"}},"required":["verdict","direction","window_size","epsilon","warmup_floor","total_complete_trials","improvement_in_window","best_so_far_curve"],"title":"StudyConvergenceShape","type":"object"},"StudyDetail":{"description":"``GET /api/v1/studies/{id}`` response + ``POST/cancel`` response.","properties":{"baseline_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Baseline Metric"},"baseline_trial_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Baseline Trial Id"},"best_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Best Metric"},"best_trial_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Best Trial Id"},"cluster_id":{"title":"Cluster Id","type":"string"},"completed_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Completed At"},"confidence":{"anyOf":[{"$ref":"#/components/schemas/ConfidenceShape"},{"type":"null"}]},"config":{"additionalProperties":true,"title":"Config","type":"object"},"convergence":{"anyOf":[{"$ref":"#/components/schemas/StudyConvergenceShape"},{"type":"null"}]},"created_at":{"format":"date-time","title":"Created At","type":"string"},"failed_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Failed Reason"},"id":{"title":"Id","type":"string"},"judgment_list_id":{"title":"Judgment List Id","type":"string"},"name":{"title":"Name","type":"string"},"objective":{"additionalProperties":true,"title":"Objective","type":"object"},"optuna_study_name":{"title":"Optuna Study Name","type":"string"},"parent_study_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Parent Study Id"},"query_set_id":{"title":"Query Set Id","type":"string"},"search_space":{"additionalProperties":true,"title":"Search Space","type":"object"},"started_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Started At"},"status":{"enum":["queued","running","completed","cancelled","failed"],"title":"Status","type":"string"},"target":{"title":"Target","type":"string"},"template_id":{"title":"Template Id","type":"string"},"trials_summary":{"$ref":"#/components/schemas/TrialsSummaryShape"}},"required":["id","name","cluster_id","target","template_id","query_set_id","judgment_list_id","search_space","objective","config","status","failed_reason","optuna_study_name","parent_study_id","baseline_metric","baseline_trial_id","best_metric","best_trial_id","created_at","started_at","completed_at","trials_summary"],"title":"StudyDetail","type":"object"},"StudyListResponse":{"description":"``GET /api/v1/studies`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/StudySummary"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"StudyListResponse","type":"object"},"StudySummary":{"description":"List-view shape.","properties":{"best_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Best Metric"},"cluster_id":{"title":"Cluster Id","type":"string"},"completed_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Completed At"},"convergence_verdict":{"anyOf":[{"enum":["converged","still_improving","too_few_trials"],"type":"string"},{"type":"null"}],"title":"Convergence Verdict"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"direction":{"default":"maximize","enum":["maximize","minimize"],"title":"Direction","type":"string"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"status":{"enum":["queued","running","completed","cancelled","failed"],"title":"Status","type":"string"},"trial_count":{"default":0,"title":"Trial Count","type":"integer"}},"required":["id","name","cluster_id","status","best_metric","created_at","completed_at"],"title":"StudySummary","type":"object"},"Subsystems":{"description":"Per-subsystem reachability/configuration state. Wire values per spec §7.4.","properties":{"db":{"description":"Postgres reachability","enum":["ok","down"],"title":"Db","type":"string"},"elasticsearch":{"description":"Local Elasticsearch container reachability","enum":["reachable","unreachable"],"title":"Elasticsearch","type":"string"},"elasticsearch_clusters":{"$ref":"#/components/schemas/ClusterAggregateHealth","description":"Aggregate health of user-registered clusters (infra_adapter_elastic Story 3.5 / spec §2). registered=0 → all-zero counts; informational only — does NOT trigger overall `degraded`."},"openai":{"description":"OpenAI key + capability state. 'incapable' added per FR-2 vs. spec §7.4 enum table — see implementation_plan.md §13 Review log.","enum":["configured","missing_key","incapable"],"title":"Openai","type":"string"},"opensearch":{"description":"Local OpenSearch container reachability","enum":["reachable","unreachable"],"title":"Opensearch","type":"string"},"redis":{"description":"Redis reachability","enum":["ok","down"],"title":"Redis","type":"string"},"solr":{"default":"not_configured","description":"Local Apache Solr container reachability. 'not_configured' when SOLR_HOST is unset (operator opted out of running the Solr service). Added by infra_adapter_solr Story A10 / spec FR-12a.","enum":["reachable","unreachable","not_configured"],"title":"Solr","type":"string"}},"required":["db","redis","openai","elasticsearch","opensearch","elasticsearch_clusters"],"title":"Subsystems","type":"object"},"SwapTemplateFollowup":{"additionalProperties":false,"description":"A 'swap_template' followup — re-run against a different query template.\n\nCarries the LLM-proposed bounds for params shared with the parent template\nin ``search_space``. The digest worker calls\n:func:`backend.app.domain.study.template_swap.remap_search_space_for_swap_target`\nafter parsing to merge these bounds with heuristic defaults for any\nswap-target params not shared with the parent.\n\nOwner: ``feat_digest_executable_followups_swap_template`` (Tier B).","properties":{"kind":{"const":"swap_template","title":"Kind","type":"string"},"rationale":{"title":"Rationale","type":"string"},"search_space":{"$ref":"#/components/schemas/SearchSpace"},"template_id":{"maxLength":36,"minLength":36,"title":"Template Id","type":"string"}},"required":["kind","rationale","template_id","search_space"],"title":"SwapTemplateFollowup","type":"object"},"TargetInfo":{"description":"One target (index / collection) on a cluster.","properties":{"doc_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Doc Count"},"name":{"title":"Name","type":"string"}},"required":["name"],"title":"TargetInfo","type":"object"},"TargetListResponse":{"description":"Response for ``GET /api/v1/clusters/{cluster_id}/targets`` (FR-1).\n\nUnpaginated by design — see feature_spec.md §7.1 \"pagination shape\nrationale\". The single-resource lookup pattern matches\n``/clusters/{id}/schema`` rather than the queryable ``/clusters`` list.\n``EntitySelectListPage``'s ``next_cursor`` and ``has_more`` fields\nare optional, so this bare ``data``-only shape consumes correctly on\nthe frontend without pretending to be a cursor endpoint.","properties":{"data":{"items":{"$ref":"#/components/schemas/TargetInfo"},"title":"Data","type":"array"}},"required":["data"],"title":"TargetListResponse","type":"object"},"TextFollowup":{"additionalProperties":false,"description":"A free-form textual suggestion — no auto-prefill, operator interprets.","properties":{"kind":{"const":"text","title":"Kind","type":"string"},"rationale":{"title":"Rationale","type":"string"},"search_space":{"title":"Search Space","type":"null"}},"required":["kind","rationale"],"title":"TextFollowup","type":"object"},"TrialDetail":{"description":"``GET /api/v1/studies/{id}/trials`` response row.","properties":{"duration_ms":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Duration Ms"},"ended_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Ended At"},"error":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error"},"id":{"title":"Id","type":"string"},"is_baseline":{"default":false,"title":"Is Baseline","type":"boolean"},"metrics":{"additionalProperties":true,"title":"Metrics","type":"object"},"optuna_trial_number":{"title":"Optuna Trial Number","type":"integer"},"params":{"additionalProperties":true,"title":"Params","type":"object"},"primary_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Primary Metric"},"started_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Started At"},"status":{"enum":["complete","failed","pruned"],"title":"Status","type":"string"},"study_id":{"title":"Study Id","type":"string"}},"required":["id","study_id","optuna_trial_number","params","primary_metric","metrics","duration_ms","status","error","started_at","ended_at"],"title":"TrialDetail","type":"object"},"TrialListResponse":{"description":"``GET /api/v1/studies/{id}/trials`` response.","properties":{"data":{"items":{"$ref":"#/components/schemas/TrialDetail"},"title":"Data","type":"array"},"has_more":{"title":"Has More","type":"boolean"},"next_cursor":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Next Cursor"}},"required":["data","next_cursor","has_more"],"title":"TrialListResponse","type":"object"},"TrialsSummaryShape":{"description":"The ``trials_summary`` field embedded in :class:`StudyDetail`.","properties":{"best_primary_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Best Primary Metric"},"complete":{"title":"Complete","type":"integer"},"failed":{"title":"Failed","type":"integer"},"pruned":{"title":"Pruned","type":"integer"},"total":{"title":"Total","type":"integer"}},"required":["total","complete","failed","pruned","best_primary_metric"],"title":"TrialsSummaryShape","type":"object"},"UbiReadinessResponse":{"description":"``GET /api/v1/clusters/{cluster_id}/ubi-readiness`` response (FR-7).\n\n``covered_pairs_pct`` and ``head_covered`` are nullable — MVP2's\nrung classifier uses event-count thresholds (the SearchAdapter\nProtocol doesn't expose an exact ``_count`` endpoint). The fields\nare reserved on the wire so a future ``infra_adapter_count_method``\ncan fill them without breaking the contract. See\n:mod:`backend.app.services.ubi_readiness` for the rationale.","properties":{"checked_at":{"format":"date-time","title":"Checked At","type":"string"},"covered_pairs_pct":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Covered Pairs Pct"},"head_covered":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Head Covered"},"rung":{"enum":["rung_0","rung_1","rung_2","rung_3"],"title":"Rung","type":"string"}},"required":["rung","covered_pairs_pct","head_covered","checked_at"],"title":"UbiReadinessResponse","type":"object"},"UpdateQueryRequest":{"additionalProperties":false,"description":"``PATCH /api/v1/query-sets/{set_id}/queries/{query_id}`` body.\n\nWhole-object replace on ``query_metadata`` (NOT deep-merge); explicit\n``null`` removes a nullable field; omitted key = no change. Empty\nbody ``{}`` validates as a no-op (AC-28).\n\n``query_text`` is NOT NULL on the underlying table, so explicit-null\nis rejected by the ``@model_validator`` below (a 422 surfaces sooner\nthan the SQL ``NotNullViolation``).","properties":{"query_metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Query Metadata"},"query_text":{"anyOf":[{"maxLength":4000,"minLength":1,"type":"string"},{"type":"null"}],"title":"Query Text"},"reference_answer":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Reference Answer"}},"title":"UpdateQueryRequest","type":"object"},"ValidationError":{"properties":{"ctx":{"title":"Context","type":"object"},"input":{"title":"Input"},"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"title":"Location","type":"array"},"msg":{"title":"Message","type":"string"},"type":{"title":"Error Type","type":"string"}},"required":["loc","msg","type"],"title":"ValidationError","type":"object"},"WidenFollowup":{"additionalProperties":false,"description":"A 'widen' followup — re-run with a broader range than the parent.","properties":{"kind":{"const":"widen","title":"Kind","type":"string"},"rationale":{"title":"Rationale","type":"string"},"search_space":{"$ref":"#/components/schemas/SearchSpace"}},"required":["kind","rationale","search_space"],"title":"WidenFollowup","type":"object"},"_ClusterEmbed":{"description":"Inline cluster summary on proposal responses.","properties":{"engine_type":{"title":"Engine Type","type":"string"},"environment":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Environment"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"}},"required":["id","name","engine_type"],"title":"_ClusterEmbed","type":"object"},"_DigestEmbed":{"description":"Inline digest summary on the proposal-detail response.\n\nfeat_digest_executable_followups Story 4.1 — ``suggested_followups`` is\nnow a discriminated-union list (see ``DigestResponse``).","properties":{"generated_at":{"format":"date-time","title":"Generated At","type":"string"},"id":{"title":"Id","type":"string"},"narrative":{"title":"Narrative","type":"string"},"parameter_importance":{"additionalProperties":{"type":"number"},"title":"Parameter Importance","type":"object"},"recommended_config":{"additionalProperties":true,"title":"Recommended Config","type":"object"},"suggested_followups":{"items":{"$ref":"#/components/schemas/FollowupItem"},"title":"Suggested Followups","type":"array"}},"required":["id","narrative","parameter_importance","recommended_config","suggested_followups","generated_at"],"title":"_DigestEmbed","type":"object"},"_SourceBreakdown":{"description":"Source-breakdown sub-shape on :class:`JudgmentListDetail`.\n\nEvolved 2026-05-29 by ``feat_ubi_judgments`` FR-10 — now three terms\n(``llm + human + click == judgment_count``). The cycle-2 F6\n\"click folds into human\" contract is superseded the moment UBI ships\nclick rows; the UI's source-breakdown card now renders all three\nbuckets separately so operators see the mix at a glance.","properties":{"click":{"title":"Click","type":"integer"},"human":{"title":"Human","type":"integer"},"llm":{"title":"Llm","type":"integer"}},"required":["llm","human","click"],"title":"_SourceBreakdown","type":"object"},"_StudySummary":{"description":"Inline study summary on the proposal-detail response.","properties":{"best_metric":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Best Metric"},"best_trial_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Best Trial Id"},"id":{"title":"Id","type":"string"},"judgment_list":{"additionalProperties":true,"title":"Judgment List","type":"object"},"name":{"title":"Name","type":"string"},"query_set":{"additionalProperties":true,"title":"Query Set","type":"object"},"status":{"title":"Status","type":"string"}},"required":["id","name","status","best_metric","best_trial_id","query_set","judgment_list"],"title":"_StudySummary","type":"object"},"_TemplateEmbed":{"description":"Inline template summary on proposal responses.","properties":{"engine_type":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Engine Type"},"id":{"title":"Id","type":"string"},"name":{"title":"Name","type":"string"},"version":{"title":"Version","type":"integer"}},"required":["id","name","version"],"title":"_TemplateEmbed","type":"object"}}},"info":{"description":"Open-source automated relevance tuning for enterprise search platforms","title":"RelyLoop","version":"0.1.0"},"openapi":"3.1.0","paths":{"/api/v1/_test/auto-followup/seed-chain":{"post":{"description":"Test-only endpoint. Returns 404 unless `ENVIRONMENT=development`. Inserts a chain of `depth + 1` studies where each child carries the prior node's id as `parent_study_id`. The public POST /studies endpoint does NOT accept `parent_study_id` (it's set only by the auto-followup worker via `repo.create_study(parent_study_id=...)`), so this endpoint is the only way to drive deterministic E2E coverage of chain-panel parent-link / children-table / cascade-radio paths. Closes chore_auto_followup_e2e_chain_seed_helper.","operationId":"seed_auto_followup_chain_endpoint_api_v1__test_auto_followup_seed_chain_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SeedAutoFollowupChainRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SeedAutoFollowupChainResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Seed an auto-followup chain of N+1 linked studies","tags":["test-only"]}},"/api/v1/_test/demo/reseed":{"post":{"description":"Enqueues an Arq job that wipes the demo Postgres tables + ES/OS indices, then re-seeds the 4 demo scenarios from ``scripts/seed_meaningful_demos.py`` using REAL studies (real Optuna trials, real metrics per scenario). Returns 202 + an initial ``ReseedStatusResponse`` immediately; the frontend polls ``GET /api/v1/_test/demo/reseed/status`` for progress.\n\nPer ``bug_demo_reseed_fake_metric_regression``. Replaces the previous synchronous path that called ``/_test/studies/seed-completed`` and produced identical ``best_metric=0.487`` rows for every scenario.","operationId":"reseed_demo_api_v1__test_demo_reseed_post","responses":{"202":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReseedStatusResponse"}}},"description":"Successful Response"}},"summary":"Enqueue a demo-state reseed (dev-only, async)","tags":["test-only"]}},"/api/v1/_test/demo/reseed/status":{"get":{"description":"Returns the current reseed status from Redis. When no reseed has ever run (or the result TTL'd out), returns ``{status: 'idle'}`` rather than 404 so the frontend's polling loop is trivially safe.","operationId":"reseed_demo_status_api_v1__test_demo_reseed_status_get","responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReseedStatusResponse"}}},"description":"Successful Response"}},"summary":"Poll the current demo-reseed progress (dev-only)","tags":["test-only"]}},"/api/v1/_test/digests/{digest_id}":{"delete":{"description":"FR-2: Hard-delete the digest row. No FK children — no preflight needed.","operationId":"delete_test_digest_api_v1__test_digests__digest_id__delete","parameters":[{"in":"path","name":"digest_id","required":true,"schema":{"title":"Digest Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Hard-delete a digest (test-only)","tags":["test-only"]}},"/api/v1/_test/judgment-lists/{judgment_list_id}":{"delete":{"description":"FR-4 — hard-delete the judgment_list row.\n\nJudgments cascade-delete via existing FK. Preflight-checks ``studies``\n(non-cascade); 409 if any study references the judgment_list.","operationId":"delete_test_judgment_list_api_v1__test_judgment_lists__judgment_list_id__delete","parameters":[{"in":"path","name":"judgment_list_id","required":true,"schema":{"title":"Judgment List Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Hard-delete a judgment_list (test-only)","tags":["test-only"]}},"/api/v1/_test/proposals/{proposal_id}":{"delete":{"description":"FR-1: Hard-delete the proposal row. No FK children — no preflight needed.","operationId":"delete_test_proposal_api_v1__test_proposals__proposal_id__delete","parameters":[{"in":"path","name":"proposal_id","required":true,"schema":{"title":"Proposal Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Hard-delete a proposal (test-only)","tags":["test-only"]}},"/api/v1/_test/query-sets/{query_set_id}":{"delete":{"description":"FR-5 — hard-delete the query_set row.\n\nQueries cascade-delete via existing FK. Preflight-checks ``studies``\n+ ``judgment_lists`` (both non-cascade); 409 with resource-specific\ncode if either references.","operationId":"delete_test_query_set_api_v1__test_query_sets__query_set_id__delete","parameters":[{"in":"path","name":"query_set_id","required":true,"schema":{"title":"Query Set Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Hard-delete a query_set (test-only)","tags":["test-only"]}},"/api/v1/_test/query-templates/{template_id}":{"delete":{"description":"FR-6 — hard-delete the query_template row.\n\nNo FK children cascade with template. Preflight-checks ``studies``,\n``proposals``, and ``judgment_lists.current_template_id`` in\n**fixed priority order: STUDY > PROPOSAL > JUDGMENT_LIST** (per\nspec §FR-6) — first match wins.","operationId":"delete_test_query_template_api_v1__test_query_templates__template_id__delete","parameters":[{"in":"path","name":"template_id","required":true,"schema":{"title":"Template Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Hard-delete a query_template (test-only)","tags":["test-only"]}},"/api/v1/_test/studies/seed-completed":{"post":{"description":"Test-only endpoint. Returns 404 unless `ENVIRONMENT=development`. Inserts a study (driven through queued → running → completed via the legal state-machine transitions), 2 trials (one winner, one comparison), a digest, and optionally a pending proposal in a single transaction. Used by the Playwright E2E suite to cover the digest-panel surfaces (7 tooltip placements + AC-7 body content + AC-11 Open PR enabled/disabled branches) without waiting on the orchestrator + Optuna workers.","operationId":"seed_completed_study_api_v1__test_studies_seed_completed_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SeedCompletedStudyRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SeedCompletedStudyResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Seed a completed study + digest + (optional) pending proposal","tags":["test-only"]}},"/api/v1/_test/studies/{study_id}":{"delete":{"description":"FR-3 — hard-delete the study row.\n\nTrials cascade-delete via existing FK. Preflight-checks ``proposals``\n+ ``digests`` (both non-cascade); 409 if any dependent rows reference\nthe study.","operationId":"delete_test_study_api_v1__test_studies__study_id__delete","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Hard-delete a study (test-only)","tags":["test-only"]}},"/api/v1/clusters":{"get":{"description":"List clusters with cursor pagination + ``X-Total-Count`` header.\n\n``?q=`` is a Postgres FTS match against the cluster's ``search_vector``\n(name + base_url); 2–200 chars. Filter-only — ordering unchanged per\nspec FR-1. ``?sort=`` is one of the values in\n:data:`~backend.app.api.v1.schemas.ClusterSortKey`; the cursor is\nsort-aware so the keyset predicate matches the active ORDER BY\n(feat_data_table_primitive Stories 1.2 + 1.3).","operationId":"list_clusters_api_v1_clusters_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"q","required":false,"schema":{"anyOf":[{"maxLength":200,"minLength":2,"type":"string"},{"type":"null"}],"title":"Q"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["name:asc","name:desc","created_at:asc","created_at:desc","environment:asc","environment:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}},{"in":"query","name":"engine_type","required":false,"schema":{"anyOf":[{"enum":["elasticsearch","opensearch","solr"],"type":"string"},{"type":"null"}],"title":"Engine Type"}},{"in":"query","name":"environment","required":false,"schema":{"anyOf":[{"enum":["prod","staging","dev"],"type":"string"},{"type":"null"}],"title":"Environment"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClusterListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Clusters","tags":["clusters"]},"post":{"description":"Register a cluster (FR-5 / AC-1).","operationId":"create_cluster_api_v1_clusters_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateClusterRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClusterDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Cluster","tags":["clusters"]}},"/api/v1/clusters/test-connection":{"post":{"description":"Probe a cluster config WITHOUT persisting (infra_adapter_solr Story A9).\n\nPowers the registration modal's \"Test connection\" button. Always 200 —\ntransport failures surface as ``reachable=false`` with ``error`` set.\nInvalid engine×auth pairings 400 BEFORE the network call.","operationId":"test_connection_api_v1_clusters_test_connection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConnectionTestRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConnectionTestResult"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Test Connection","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}":{"delete":{"description":"Soft-delete a cluster (AC-8). Returns 204 with no body.","operationId":"delete_cluster_api_v1_clusters__cluster_id__delete","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Delete Cluster","tags":["clusters"]},"get":{"description":"Return cluster row + cached/fresh health probe.","operationId":"get_cluster_detail_api_v1_clusters__cluster_id__get","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClusterDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Cluster Detail","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/reprobe":{"post":{"description":"Re-run cluster capability probe (Story A9 / spec FR-2 + AC-14).\n\nConcurrent calls serialize on ``SELECT … FOR UPDATE``. On probe failure\nthe row's engine_config is NOT updated (the transaction rolls back).","operationId":"reprobe_cluster_api_v1_clusters__cluster_id__reprobe_post","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}}],"responses":{"202":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClusterDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Reprobe Cluster","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/run_query":{"post":{"description":"Execute one query DSL fragment against the cluster (FR-6 / AC-3).","operationId":"run_query_api_v1_clusters__cluster_id__run_query_post","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}},{"in":"query","name":"timeout_s","required":false,"schema":{"default":5.0,"maximum":30.0,"minimum":1.0,"title":"Timeout S","type":"number"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunQueryRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunQueryResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Run Query","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/schema":{"get":{"description":"Return the field schema for ``target`` (FR-4 / AC-2).","operationId":"get_cluster_schema_api_v1_clusters__cluster_id__schema_get","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}},{"in":"query","name":"target","required":true,"schema":{"maxLength":256,"minLength":1,"title":"Target","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Schema"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Cluster Schema","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/targets":{"get":{"description":"List targets (indices/collections) on the cluster (FR-1 / AC-1).\n\nThin passthrough to ``ElasticAdapter.list_targets()`` (which filters out\nsystem indices whose names start with ``.``). Mirrors the ``get_cluster_schema``\npattern: ``get_cluster`` → ``acquire_adapter`` async context → adapter call\n→ translate exceptions via the ``_err()`` helper to the spec §7.5 envelope.\n\nError mapping:\n* cluster missing or soft-deleted → 404 ``CLUSTER_NOT_FOUND`` (retryable=false)\n* adapter raises ``TargetsForbiddenError`` (ACL 401/403) → 403\n ``TARGETS_FORBIDDEN`` (retryable=false) — frontend auto-engages manual mode\n* adapter raises ``ClusterUnreachableError`` (5xx / connection failure) → 503\n ``CLUSTER_UNREACHABLE`` (retryable=true)","operationId":"list_cluster_targets_api_v1_clusters__cluster_id__targets_get","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/TargetListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Cluster Targets","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/targets/{target}/documents":{"get":{"description":"Paginated _id + truncated _source preview for a target (FR-3).\n\nThe endpoint asks the adapter for ``limit + 1`` rows so it can detect\nend-of-data exactly (no extra round-trip). Only the first ``limit`` rows\nare returned; ``next_cursor`` encodes the ES ``hits[i].sort`` of the\nlast visible row when ``has_more`` is True. ``X-Total-Count`` header\ncarries the engine's ``hits.total.value``.","operationId":"list_target_documents_api_v1_clusters__cluster_id__targets__target__documents_get","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}},{"in":"path","name":"target","required":true,"schema":{"title":"Target","type":"string"}},{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"maxLength":4096,"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":25,"maximum":100,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"fields","required":false,"schema":{"anyOf":[{"maxLength":2048,"type":"string"},{"type":"null"}],"title":"Fields"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/DocumentListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Target Documents","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/targets/{target}/documents/{doc_id}":{"get":{"description":"Fetch one document by ``_id`` (FR-4).\n\nFastAPI's ``{doc_id:path}`` converter round-trips slashes verbatim, so\noperator IDs containing ``/`` are supported (D-17 / AC-16). Returns the\nadapter ``Document`` shape directly; on ``found: false`` returns 404\n``DOCUMENT_NOT_FOUND`` (distinct from ``TARGET_NOT_FOUND``).","operationId":"get_target_document_api_v1_clusters__cluster_id__targets__target__documents__doc_id__get","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}},{"in":"path","name":"target","required":true,"schema":{"title":"Target","type":"string"}},{"in":"path","name":"doc_id","required":true,"schema":{"title":"Doc Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Document"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Target Document","tags":["clusters"]}},"/api/v1/clusters/{cluster_id}/ubi-readiness":{"get":{"description":"Classify ``(cluster, query_set, target)`` on the UBI rung ladder.\n\nfeat_ubi_judgments FR-7.\n\nRequired query params: ``query_set_id`` + ``target`` (Spec FR-7 +\ncycle-3 D-10c: the endpoint MUST 422 without them — the classifier\ncan't compute a per-target rung without an application filter).\n\nError envelopes (all per spec §7.5):\n* ``404 CLUSTER_NOT_FOUND`` — cluster row missing or soft-deleted.\n* ``404 QUERY_SET_NOT_FOUND`` — query set row missing.\n* ``422 VALIDATION_ERROR`` — missing required query params (FastAPI's\n built-in handler, surfaces via ``api/errors.py``).\n* ``503 CLUSTER_UNREACHABLE`` — adapter cannot reach the cluster.\n\nThe result is cached for 60 s in Redis per\n``(cluster_id, query_set_id, target)`` so back-to-back dialog-open\nand dialog-submit calls don't re-probe.","operationId":"get_cluster_ubi_readiness_api_v1_clusters__cluster_id__ubi_readiness_get","parameters":[{"in":"path","name":"cluster_id","required":true,"schema":{"title":"Cluster Id","type":"string"}},{"in":"query","name":"query_set_id","required":true,"schema":{"maxLength":36,"minLength":1,"title":"Query Set Id","type":"string"}},{"in":"query","name":"target","required":true,"schema":{"maxLength":256,"minLength":1,"title":"Target","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UbiReadinessResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Cluster Ubi Readiness","tags":["clusters"]}},"/api/v1/config-repos":{"get":{"description":"Cursor-paginated config-repo list, newest first.","operationId":"list_config_repos_endpoint_api_v1_config_repos_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConfigReposListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Config Repos Endpoint","tags":["config-repos"]},"post":{"description":"Register a new config repo. ``provider`` is server-derived from ``repo_url``.\n\nPreflight order matches spec FR-3:\n\n1. ``validate_repo_url(repo_url)`` → 400 ``UNSUPPORTED_PROVIDER`` for\n non-GitHub URLs (AC-8). GitLab + Bitbucket arrive at MVP3.\n2. ``./secrets/{auth_ref}`` must exist → else 400 ``AUTH_REF_NOT_FOUND``\n (AC-9). The contents check defers to the worker — operators may\n populate the file between registration and first PR-open.\n3. ``name`` uniqueness check → 409 ``CONFIG_REPO_NAME_TAKEN`` on collision.\n4. Insert with server-derived ``provider=\"github\"``.\n5. **feat_github_webhook Story 4.2** — when ``webhook_secret_ref`` is\n populated, best-effort enqueue ``register_webhook`` against the\n newly created config_repo id. Enqueue failure (Redis down, pool\n absent, transient blip) does NOT break the 201 — it logs WARN\n and the operator drives recovery via the runbook.","operationId":"create_config_repo_endpoint_api_v1_config_repos_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateConfigRepoRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConfigRepoDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Config Repo Endpoint","tags":["config-repos"]}},"/api/v1/config-repos/{config_repo_id}":{"get":{"description":"Detail by id; 404 ``CONFIG_REPO_NOT_FOUND`` if missing.\n\nfeat_config_repo_baseline_tracking FR-4 — when\n``last_merged_proposal_id`` is set, embed the pointed-at proposal as a\n:class:`ProposalSummary` with ``is_currently_live=True``. The embed-side\nderivation uses the pointer context directly (NOT the generic\n``proposals → clusters → config_repos`` JOIN used elsewhere) so the\nbadge renders correctly even when the proposal's cluster was later\nunwired from this config_repo (spec §19 \"Cluster-with-config_repo-\nrotated\" decision-log entry).","operationId":"get_config_repo_endpoint_api_v1_config_repos__config_repo_id__get","parameters":[{"in":"path","name":"config_repo_id","required":true,"schema":{"title":"Config Repo Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConfigRepoDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Config Repo Endpoint","tags":["config-repos"]}},"/api/v1/conversations":{"get":{"description":"List conversations newest-first with per-row message_count + X-Total-Count header.\n\n``?since=`` (Story 1.5 — closes api-conventions.md drift) filters by\n``created_at >= since``. ``?q=`` (Story 1.2) is a Postgres FTS match\nagainst ``search_vector`` (coalesce(title, '')); 2-200 chars.","operationId":"list_conversations_endpoint_api_v1_conversations_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"q","required":false,"schema":{"anyOf":[{"maxLength":200,"minLength":2,"type":"string"},{"type":"null"}],"title":"Q"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConversationsListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Conversations Endpoint","tags":["conversations"]},"post":{"description":"Create a new conversation. Title is optional (FR-1 auto-generates from first message).","operationId":"create_conversation_endpoint_api_v1_conversations_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateConversationRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConversationSummary"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Conversation Endpoint","tags":["conversations"]}},"/api/v1/conversations/{conversation_id}":{"delete":{"description":"Soft-delete the conversation; subsequent reads return 404.","operationId":"delete_conversation_endpoint_api_v1_conversations__conversation_id__delete","parameters":[{"in":"path","name":"conversation_id","required":true,"schema":{"title":"Conversation Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Delete Conversation Endpoint","tags":["conversations"]},"get":{"description":"Return the conversation's full message history.","operationId":"get_conversation_endpoint_api_v1_conversations__conversation_id__get","parameters":[{"in":"path","name":"conversation_id","required":true,"schema":{"title":"Conversation Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConversationDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Conversation Endpoint","tags":["conversations"]}},"/api/v1/conversations/{conversation_id}/messages":{"post":{"description":"Send a user message and stream the assistant turn as SSE.\n\nPreflight (in order; returns plain JSON envelope, NOT a partial stream):\n A. Conversation exists → else 404 ``CONVERSATION_NOT_FOUND``.\n B. ``Settings.openai_api_key`` populated → else 503 ``OPENAI_NOT_CONFIGURED``.\n C. Daily budget peek under cap → else 503 ``OPENAI_BUDGET_EXCEEDED``.\n\nSuccessful preflight returns a ``StreamingResponse(text/event-stream)``\ndriven by :func:`agent_chat.send_user_message`.","operationId":"post_message_endpoint_api_v1_conversations__conversation_id__messages_post","parameters":[{"in":"path","name":"conversation_id","required":true,"schema":{"title":"Conversation Id","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SendMessageRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Post Message Endpoint","tags":["conversations"]}},"/api/v1/judgment-lists":{"get":{"description":"List judgment lists, newest-first with cursor pagination.\n\n``?since=`` filters by ``created_at >= since`` (Story 1.5). ``?q=`` FTS\nmatch against ``search_vector`` (name + target). ``?sort=`` is a\n:data:`JudgmentListSortKey` value with sort-aware cursor (Story 1.3).\n``?query_set_id`` / ``?cluster_id`` filter to lists belonging to the\nsupplied parent (``bug_judgment_lists_listing_ignores_query_set_filter``\n— required by the create-study modal's Step-2 dropdown so the user\ncan only pick judgment-lists valid for the chosen query-set + cluster;\nwithout these filters the modal returns all rows and the user can\npick a mismatched pair, which the ``POST /api/v1/studies`` cross-\nentity integrity check then rejects at create time with a confusing\n422 ``VALIDATION_ERROR: \"judgment_list query_set_id does not match\nstudy query_set_id\"``).\n\n``?target=`` filters by exact target index/collection name\n(``feat_study_target_judgment_mismatch_guard`` FR-2 — pairs with the\n``POST /studies`` ``JUDGMENT_TARGET_MISMATCH`` 422 so the create-study\nmodal can pre-filter the dropdown to only lists matching the chosen\nstudy target). Bounded by the ES/OpenSearch index-name ceiling\n(255 bytes).","operationId":"list_judgment_lists_endpoint_api_v1_judgment_lists_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"q","required":false,"schema":{"anyOf":[{"maxLength":200,"minLength":2,"type":"string"},{"type":"null"}],"title":"Q"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["name:asc","name:desc","created_at:asc","created_at:desc","status:asc","status:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}},{"in":"query","name":"query_set_id","required":false,"schema":{"anyOf":[{"maxLength":36,"minLength":1,"type":"string"},{"type":"null"}],"title":"Query Set Id"}},{"in":"query","name":"cluster_id","required":false,"schema":{"anyOf":[{"maxLength":36,"minLength":1,"type":"string"},{"type":"null"}],"title":"Cluster Id"}},{"in":"query","name":"target","required":false,"schema":{"anyOf":[{"maxLength":255,"minLength":1,"type":"string"},{"type":"null"}],"title":"Target"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/JudgmentListListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Judgment Lists Endpoint","tags":["judgments"]}},"/api/v1/judgment-lists/import":{"post":{"description":"Create a judgment_lists row with status='complete' + bulk-insert judgments.\n\nTutorial path; no OpenAI involvement. Every supplied judgment must\nreference a ``query_id`` that exists in ``body.query_set_id`` —\nmismatches → 400 ``QUERY_NOT_IN_SET``.","operationId":"import_judgment_list_api_v1_judgment_lists_import_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportJudgmentListRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/JudgmentListDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Import Judgment List","tags":["judgments"]}},"/api/v1/judgment-lists/{judgment_list_id}":{"get":{"operationId":"get_judgment_list_endpoint_api_v1_judgment_lists__judgment_list_id__get","parameters":[{"in":"path","name":"judgment_list_id","required":true,"schema":{"title":"Judgment List Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/JudgmentListDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Judgment List Endpoint","tags":["judgments"]}},"/api/v1/judgment-lists/{judgment_list_id}/calibration":{"post":{"description":"Compute Cohen's + weighted kappa from supplied human samples.\n\nPairs are built by joining each sample with the existing\n``source='llm'`` judgment at ``(query_id, doc_id)`` — overridden rows\n(``source='human'``) are excluded (per spec FR-5 + GPT-5.5 cycle 1 F12).","operationId":"calibrate_judgment_list_api_v1_judgment_lists__judgment_list_id__calibration_post","parameters":[{"in":"path","name":"judgment_list_id","required":true,"schema":{"title":"Judgment List Id","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CalibrationSamplesRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CalibrationResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Calibrate Judgment List","tags":["judgments"]}},"/api/v1/judgment-lists/{judgment_list_id}/judgments":{"get":{"description":"List per-list judgments with cursor pagination.\n\n``?sort=`` is :data:`JudgmentRowSortKey` with sort-aware cursor\n(feat_data_table_primitive Story 1.3).","operationId":"list_judgments_endpoint_api_v1_judgment_lists__judgment_list_id__judgments_get","parameters":[{"in":"path","name":"judgment_list_id","required":true,"schema":{"title":"Judgment List Id","type":"string"}},{"in":"query","name":"source","required":false,"schema":{"anyOf":[{"enum":["llm","human","click"],"type":"string"},{"type":"null"}],"title":"Source"}},{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["created_at:asc","created_at:desc","rating:asc","rating:desc","source:asc","source:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/JudgmentListJudgmentsResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Judgments Endpoint","tags":["judgments"]}},"/api/v1/judgment-lists/{judgment_list_id}/judgments/{judgment_id}":{"patch":{"description":"Replace an LLM rating with a human override (UPSERT-replace).","operationId":"override_judgment_api_v1_judgment_lists__judgment_list_id__judgments__judgment_id__patch","parameters":[{"in":"path","name":"judgment_list_id","required":true,"schema":{"title":"Judgment List Id","type":"string"}},{"in":"path","name":"judgment_id","required":true,"schema":{"title":"Judgment Id","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/OverrideJudgmentRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/JudgmentRow"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Override Judgment","tags":["judgments"]}},"/api/v1/judgments/generate":{"post":{"description":"Create a judgment_lists row + enqueue the worker.\n\nDelegates the full preflight + INSERT + Arq enqueue to\n:func:`backend.app.services.agent_judgments_dispatch.start_judgment_generation`\nso the chat-agent ``generate_judgments_llm`` tool reuses the exact same\nchecks (no duplicated preflight). Wire behavior is identical — same error\ncodes, same status codes, same response shape.","operationId":"generate_judgments_api_v1_judgments_generate_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateJudgmentListGenerateRequest"}}},"required":true},"responses":{"202":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateJudgmentsResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Generate Judgments","tags":["judgments"]}},"/api/v1/judgments/generate-from-ubi":{"post":{"description":"Start a UBI-derived judgment generation job.\n\nDelegates to\n:func:`backend.app.services.agent_judgments_dispatch.start_ubi_judgment_generation`\nwhich runs the full FR-4 preflight (U-A..U-H) before INSERT + Arq\nenqueue. The Pydantic ``model_validator`` on\n:class:`CreateJudgmentListFromUbiRequest` already enforces the\nhybrid conditional (``current_template_id`` + ``rubric`` required\niff ``converter == 'hybrid_ubi_llm'``); the dispatcher trusts the\nvalidated request.","operationId":"generate_judgments_from_ubi_api_v1_judgments_generate_from_ubi_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateJudgmentListFromUbiRequest"}}},"required":true},"responses":{"202":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateJudgmentsResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Generate Judgments From Ubi","tags":["judgments"]}},"/api/v1/proposals":{"get":{"description":"List proposals with cursor pagination + filters.\n\n``?template_id=`` (Story 1.5) filters by ``proposals.template_id`` FK;\n``?study_id=`` filters by ``proposals.study_id`` FK (used by the\nstudy-detail page's pending-proposal lookup). Both reject invalid\nUUIDs with 422 via FastAPI's UUID parsing. ``?sort=`` (Story 1.3) is\na :data:`ProposalSortKey` value with sort-aware cursor.","operationId":"list_proposals_endpoint_api_v1_proposals_get","parameters":[{"in":"query","name":"status","required":false,"schema":{"anyOf":[{"enum":["pending","pr_opened","pr_merged","rejected"],"type":"string"},{"type":"null"}],"title":"Status"}},{"in":"query","name":"cluster_id","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cluster Id"}},{"in":"query","name":"source","required":false,"schema":{"anyOf":[{"enum":["study","manual"],"type":"string"},{"type":"null"}],"title":"Source"}},{"in":"query","name":"template_id","required":false,"schema":{"anyOf":[{"format":"uuid","type":"string"},{"type":"null"}],"title":"Template Id"}},{"in":"query","name":"study_id","required":false,"schema":{"anyOf":[{"format":"uuid","type":"string"},{"type":"null"}],"title":"Study Id"}},{"in":"query","name":"is_last_merged","required":false,"schema":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Is Last Merged"}},{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["created_at:asc","created_at:desc","status:asc","status:desc","pr_state:asc","pr_state:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ProposalsListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Proposals Endpoint","tags":["proposals"]},"post":{"description":"Manually create a proposal (chat-agent hand-crafted tweaks).\n\n``study_id`` and ``study_trial_id`` are NULL for manual proposals.\nValidates FK targets (cluster + template exist) before insert.","operationId":"create_manual_proposal_api_v1_proposals_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateProposalRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ProposalDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Manual Proposal","tags":["proposals"]}},"/api/v1/proposals/{proposal_id}":{"get":{"operationId":"get_proposal_endpoint_api_v1_proposals__proposal_id__get","parameters":[{"in":"path","name":"proposal_id","required":true,"schema":{"title":"Proposal Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ProposalDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Proposal Endpoint","tags":["proposals"]}},"/api/v1/proposals/{proposal_id}/open_pr":{"post":{"description":"Enqueue the ``open_pr`` worker for an operator-approved proposal.\n\nDelegates the full preflight + Arq enqueue to\n:func:`backend.app.services.agent_proposals_dispatch.open_pr` so the\nchat-agent ``open_pr`` tool reuses the same checks. Wire behavior is\nidentical — same error codes, status codes, response shape.","operationId":"open_pr_endpoint_api_v1_proposals__proposal_id__open_pr_post","parameters":[{"in":"path","name":"proposal_id","required":true,"schema":{"title":"Proposal Id","type":"string"}}],"responses":{"202":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenPrResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Open Pr Endpoint","tags":["proposals"]}},"/api/v1/proposals/{proposal_id}/reject":{"post":{"description":"AC-5: ``pending → rejected`` transition; 409 INVALID_STATE_TRANSITION otherwise.","operationId":"reject_proposal_endpoint_api_v1_proposals__proposal_id__reject_post","parameters":[{"in":"path","name":"proposal_id","required":true,"schema":{"title":"Proposal Id","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RejectProposalRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ProposalDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Reject Proposal Endpoint","tags":["proposals"]}},"/api/v1/query-sets":{"get":{"description":"List query sets with cursor pagination + X-Total-Count.\n\n``?q=`` is FTS match against ``search_vector`` (name). ``?sort=`` is a\n:data:`QuerySetSortKey` value; cursor is sort-aware.","operationId":"list_query_sets_api_v1_query_sets_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"q","required":false,"schema":{"anyOf":[{"maxLength":200,"minLength":2,"type":"string"},{"type":"null"}],"title":"Q"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["name:asc","name:desc","created_at:asc","created_at:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QuerySetListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Query Sets","tags":["query-sets"]},"post":{"description":"Register a query set under a cluster (FR-3).","operationId":"create_query_set_api_v1_query_sets_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateQuerySetRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QuerySetDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Query Set","tags":["query-sets"]}},"/api/v1/query-sets/{query_set_id}":{"get":{"description":"Return a query set by id (includes ``query_count``).","operationId":"get_query_set_detail_api_v1_query_sets__query_set_id__get","parameters":[{"in":"path","name":"query_set_id","required":true,"schema":{"title":"Query Set Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QuerySetDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Query Set Detail","tags":["query-sets"]}},"/api/v1/query-sets/{query_set_id}/queries":{"get":{"description":"List per-query rows under a query set, with derived ``judgment_count``.","operationId":"list_queries_in_set_api_v1_query_sets__query_set_id__queries_get","parameters":[{"in":"path","name":"query_set_id","required":true,"schema":{"title":"Query Set Id","type":"string"}},{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QueryListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Queries In Set","tags":["query-sets"]},"post":{"description":"Bulk-add queries to a set (FR-3 + AC-8).\n\nDispatches on Content-Type:\n\n* ``application/json`` → :class:`BulkQueriesJsonRequest` Pydantic-parse.\n* ``text/csv`` → :func:`parse_queries_csv` (AC-8).\n\nOther content types → 415-equivalent surfaced as 400 ``INVALID_CSV``\n(the documented error code for content-type-mismatch in spec §7.5).","operationId":"bulk_add_queries_api_v1_query_sets__query_set_id__queries_post","parameters":[{"in":"path","name":"query_set_id","required":true,"schema":{"title":"Query Set Id","type":"string"}}],"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/BulkQueriesResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Bulk Add Queries","tags":["query-sets"]}},"/api/v1/query-sets/{query_set_id}/queries/{query_id}":{"delete":{"description":"Hard-delete a query. FK-guarded — 409 if any judgment references it.","operationId":"delete_query_endpoint_api_v1_query_sets__query_set_id__queries__query_id__delete","parameters":[{"in":"path","name":"query_set_id","required":true,"schema":{"title":"Query Set Id","type":"string"}},{"in":"path","name":"query_id","required":true,"schema":{"title":"Query Id","type":"string"}}],"responses":{"204":{"description":"Successful Response"},"409":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QueryHasJudgmentsEnvelope"}}},"description":"Conflict"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Delete Query Endpoint","tags":["query-sets"]},"patch":{"description":"Partial-update a query. Whole-object replace on ``query_metadata``.","operationId":"update_query_endpoint_api_v1_query_sets__query_set_id__queries__query_id__patch","parameters":[{"in":"path","name":"query_set_id","required":true,"schema":{"title":"Query Set Id","type":"string"}},{"in":"path","name":"query_id","required":true,"schema":{"title":"Query Id","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateQueryRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QueryRow"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Update Query Endpoint","tags":["query-sets"]}},"/api/v1/query-templates":{"get":{"description":"List query templates with cursor pagination + X-Total-Count header.\n\n``?q=`` FTS match (name). ``?sort=`` sort-aware cursor (Story 1.3).\n``?engine_type=`` filters by engine (Story 1.4).","operationId":"list_query_templates_api_v1_query_templates_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"q","required":false,"schema":{"anyOf":[{"maxLength":200,"minLength":2,"type":"string"},{"type":"null"}],"title":"Q"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["name:asc","name:desc","created_at:asc","created_at:desc","engine_type:asc","engine_type:desc","version:asc","version:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}},{"in":"query","name":"engine_type","required":false,"schema":{"anyOf":[{"enum":["elasticsearch","opensearch","solr"],"type":"string"},{"type":"null"}],"title":"Engine Type"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QueryTemplateListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Query Templates","tags":["query-templates"]},"post":{"description":"Register a query template (FR-2 + AC-7).\n\nAC-7: a body containing ``{{ os.system('rm -rf /') }}`` surfaces as\n400 ``INVALID_TEMPLATE_SYNTAX`` (the AST walk catches the ``Call``\nnode before reaching the meta-vars cross-check that would otherwise\nclassify ``os`` as ``UndeclaredParamUsed``).","operationId":"create_query_template_api_v1_query_templates_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateQueryTemplateRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QueryTemplateDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Query Template","tags":["query-templates"]}},"/api/v1/query-templates/{template_id}":{"get":{"description":"Return a query template by id.","operationId":"get_query_template_detail_api_v1_query_templates__template_id__get","parameters":[{"in":"path","name":"template_id","required":true,"schema":{"title":"Template Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/QueryTemplateDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Query Template Detail","tags":["query-templates"]}},"/api/v1/studies":{"get":{"description":"List studies with cursor pagination + X-Total-Count.\n\n``?status=`` is typed as :data:`StudyStatusWire` so FastAPI returns\n422 ``VALIDATION_ERROR`` for unsupported values. ``?q=`` is a Postgres\nFTS match against ``search_vector`` (name + target). ``?sort=`` is a\n:data:`StudySortKey` value (``:``); the cursor is\nsort-aware (feat_data_table_primitive Stories 1.2 + 1.3).\n\n``?target=`` (feat_index_document_browser FR-5) scopes the list to\nstudies targeting a single index/collection. Composes with all other\nfilters via AND.","operationId":"list_studies_api_v1_studies_get","parameters":[{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"status","required":false,"schema":{"anyOf":[{"enum":["queued","running","completed","cancelled","failed"],"type":"string"},{"type":"null"}],"title":"Status"}},{"in":"query","name":"cluster_id","required":false,"schema":{"anyOf":[{"maxLength":36,"minLength":1,"type":"string"},{"type":"null"}],"title":"Cluster Id"}},{"in":"query","name":"target","required":false,"schema":{"anyOf":[{"maxLength":256,"minLength":1,"type":"string"},{"type":"null"}],"title":"Target"}},{"in":"query","name":"q","required":false,"schema":{"anyOf":[{"maxLength":200,"minLength":2,"type":"string"},{"type":"null"}],"title":"Q"}},{"in":"query","name":"sort","required":false,"schema":{"anyOf":[{"enum":["name:asc","name:desc","created_at:asc","created_at:desc","completed_at:asc","completed_at:desc","best_metric:asc","best_metric:desc","status:asc","status:desc"],"type":"string"},{"type":"null"}],"title":"Sort"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StudyListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Studies","tags":["studies"]},"post":{"description":"Create a study (FR-1 + AC-1) and enqueue the orchestrator job.","operationId":"create_study_api_v1_studies_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateStudyRequest"}}},"required":true},"responses":{"201":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StudyDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Create Study","tags":["studies"]}},"/api/v1/studies/{study_id}":{"get":{"description":"Return a study by id (includes ``trials_summary``).","operationId":"get_study_detail_api_v1_studies__study_id__get","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StudyDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Study Detail","tags":["studies"]}},"/api/v1/studies/{study_id}/cancel":{"post":{"description":"Cancel a study (Story 2.3, FR-8 + AC-8/AC-9).\n\nOptionally cascades to in-flight chain children.\n\n``?cascade=true`` (default): routes through\n:func:`services.study_state.cancel_study_with_chain_cascade` —\ncancels the parent (if in-flight) AND recursively cancels in-flight\ndescendants. Tolerates terminal parents (recurses through completed\nintermediates to reach an in-flight grandchild).\n\n``?cascade=false``: routes through the original\n:func:`services.study_state.cancel_study` — single-study cancel,\npreserves the existing 409 error contract on terminal parents\n(AC-9 wire contract).","operationId":"cancel_study_api_v1_studies__study_id__cancel_post","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}},{"in":"query","name":"cascade","required":false,"schema":{"default":"true","title":"Cascade","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StudyDetail"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Cancel Study","tags":["studies"]}},"/api/v1/studies/{study_id}/chain":{"get":{"description":"Return the rolled-up chain summary for the study and its lineage (FR-3).\n\nWalks to the chain anchor, aggregates the completed-link subset into a\nbest link + cumulative lift + derived stop reason, and emits per-link\ndeltas. The anchor's ``delta_from_prev`` is always ``None`` (spec §8.3).\nReturns ``404 STUDY_NOT_FOUND`` when the study does not exist.","operationId":"get_study_chain_api_v1_studies__study_id__chain_get","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StudyChainResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Study Chain","tags":["studies"]}},"/api/v1/studies/{study_id}/children":{"get":{"description":"List direct child studies of a parent (FR-10 + D-13).\n\nReturns ``{\"data\": [], \"next_cursor\": null}`` for a study with no\nchildren — empty data array, NOT 404. 404 only fires when the parent\nstudy itself is missing.\n\nPer D-13 (direct-children-only): does NOT return transitive\ndescendants. The chain panel renders parent ↑ + direct children ↓;\noperators walk lineage one hop per page navigation.","operationId":"list_study_children_api_v1_studies__study_id__children_get","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StudyListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Study Children","tags":["studies"]}},"/api/v1/studies/{study_id}/digest":{"get":{"description":"Fetch the digest for a completed study.\n\nReturns 404 ``DIGEST_NOT_READY`` (``retryable=true``) when:\n- the study is not in ``status='completed'``, OR\n- the study is completed but the worker hasn't written the digest yet\n (worker lag, or a worker-side terminal failure like\n ``OPENAI_NOT_CONFIGURED`` deferred the run).","operationId":"get_study_digest_api_v1_studies__study_id__digest_get","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/DigestResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Get Study Digest","tags":["digests"]}},"/api/v1/studies/{study_id}/trials":{"get":{"description":"List trials in a study (FR-6).\n\nSort variants per spec §7.4: ``primary_metric_desc`` (default),\n``primary_metric_asc``, ``ended_at_desc``, ``ended_at_asc``,\n``optuna_trial_number_asc``.","operationId":"list_study_trials_api_v1_studies__study_id__trials_get","parameters":[{"in":"path","name":"study_id","required":true,"schema":{"title":"Study Id","type":"string"}},{"in":"query","name":"cursor","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cursor"}},{"in":"query","name":"limit","required":false,"schema":{"default":50,"maximum":200,"minimum":1,"title":"Limit","type":"integer"}},{"in":"query","name":"since","required":false,"schema":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"title":"Since"}},{"in":"query","name":"sort","required":false,"schema":{"default":"primary_metric_desc","title":"Sort","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/TrialListResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"List Study Trials","tags":["trials"]}},"/healthz":{"get":{"description":"Probe each subsystem in parallel and return the documented JSON shape.\n\nArgs:\n settings: Application settings (DB URL, ES/OS URLs, OpenAI base URL, etc.)\n redis_client: Redis client for ping probe + capability-cache read\n es_client: shared httpx client for ES + OpenSearch HTTP probes\n db: Async DB session for the registered-clusters aggregate (Story 3.5)\n\nReturns:\n JSONResponse with the HealthResponse body and HTTP 200 (healthy) or 503 (degraded).","operationId":"healthz_healthz_get","responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HealthResponse"}}},"description":"Successful Response"},"503":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HealthResponse"}}},"description":"One or more required subsystems is down"}},"summary":"Healthz","tags":["operator"]}},"/webhooks/github":{"post":{"description":"Receive a single GitHub webhook delivery.\n\nReturns ``{\"status\": \"ok\", \"action\": }`` where\n``wire_action`` is one of the four values in\n:data:`WEBHOOK_ACTION_VALUES`.\n\nRaises:\n HTTPException(403, INVALID_SIGNATURE): bad signature or unknown\n repository. Both share one error code so the receiver does\n not reveal repo enumeration.","operationId":"github_webhook_webhooks_github_post","responses":{"200":{"content":{"application/json":{"schema":{"additionalProperties":{"type":"string"},"title":"Response Github Webhook Webhooks Github Post","type":"object"}}},"description":"Successful Response"}},"summary":"Github Webhook","tags":["webhooks"]}}}} diff --git a/ui/public/docs/tutorial-first-study.md b/ui/public/docs/tutorial-first-study.md index c7d45386..366b7d40 100644 --- a/ui/public/docs/tutorial-first-study.md +++ b/ui/public/docs/tutorial-first-study.md @@ -447,14 +447,37 @@ deterministically, and stops on its own when the lift plateaus. 1. Open the **Create study** wizard. Pick the **Deep (1000)** preset. 2. Set **🌙 Run overnight (compound automatically)** to **depth 3**. -3. Click **Create study** before you log off. -4. In the morning, open the study detail page. The **Overnight chain** +3. Pick a **Strategy** (see below). +4. Click **Create study** before you log off. +5. In the morning, open the study detail page. The **Overnight chain** panel summarises what ran, the cumulative lift across the chain, which link won, and why the chain stopped. -5. The summary points at a proposal — click it, review the diff, open the +6. The summary points at a proposal — click it, review the diff, open the PR. (You can also cancel any mid-chain study with `?cascade=true` (the default) to halt pending children.) +### Strategy — Refine vs. Try suggestions + +The new **Strategy** toggle (visible only after depth ≥ 1 is selected) +picks how each follow-up is chosen: + +- **Refine the same knobs (predictable)** — the safer default. Each + follow-up tightens the search space around the previous winner *on the + same template*. The chain hill-climbs one set of knobs deterministically. + Use this when you trust the template + the parameters you're tuning and + you just want better numbers on them. +- **Try suggested follow-ups (broader exploration)** — each follow-up + acts on the parent digest's top runnable recommendation, which may + *widen* the bounds OR *swap* the template (e.g. from `multi-match` to + `function-score-decay`). A cycle guard prevents the chain from + ping-ponging between two templates. When the digest has no runnable + suggestion, the chain falls back to today's narrow behavior so it + never stalls. + +You'll see what each link did on the chain panel: a small `narrow ↓` / +`widen ↑` / `swapped to {template_name}` / `refined` badge next to each +study tells you the path the autopilot took. + **RelyLoop runs the exploration overnight unattended, but it never opens a PR on your behalf. The chain ends with a proposal you review and merge — your one decision.** diff --git a/ui/src/__tests__/components/studies/auto-followup-chain-panel.test.tsx b/ui/src/__tests__/components/studies/auto-followup-chain-panel.test.tsx index 47ae6269..e242b909 100644 --- a/ui/src/__tests__/components/studies/auto-followup-chain-panel.test.tsx +++ b/ui/src/__tests__/components/studies/auto-followup-chain-panel.test.tsx @@ -391,4 +391,96 @@ describe('AutoFollowupChainPanel', () => { // Whole panel hidden too (no other chain context). expect(screen.queryByTestId('auto-followup-chain-panel')).toBeNull(); }); + + // ------------------------------------------------------------------------- + // feat_overnight_final_solution Story 3.2 — per-link Strategy badge + // (AC-13: badge renders per kind; AC-14: no badge when all null). + // ------------------------------------------------------------------------- + + it('AC-13: renders the badge for each non-null selected_followup_kind', () => { + // 5-link chain: anchor (no badge) + 4 strategy kinds. + setChain( + makeChain({ + anchor_study_id: 'L0', + best_link_id: 'L0', + cumulative_lift: 0.05, + direction: 'maximize', + stop_reason: 'no_lift', + links: [ + makeLink({ + id: 'L0', + name: 'Anchor', + selected_followup_kind: null, + template_id: 'tpl-anchor', + }), + makeLink({ + id: 'L1', + name: 'L1', + selected_followup_kind: 'narrow', + template_id: 'tpl-anchor', + }), + makeLink({ + id: 'L2', + name: 'L2', + selected_followup_kind: 'widen', + template_id: 'tpl-anchor', + }), + makeLink({ + id: 'L3', + name: 'L3', + selected_followup_kind: 'narrow_default', + template_id: 'tpl-anchor', + }), + makeLink({ + id: 'L4', + name: 'L4', + selected_followup_kind: 'swap_template', + template_id: 'tpl-swap-target-aaaaaaaa-aaaa-aaaa-aaaaaaaaaaaa', + }), + ], + }), + ); + const study = makeStudy({ + id: 'L1', + parent_study_id: 'L0', + config: { auto_followup_depth: 3 }, + }); + renderPanel({ study, chainChildren: [] }); + // Anchor (null kind) — no badge. + expect(screen.queryByTestId('chain-link-strategy-L0')).toBeNull(); + // narrow / widen / narrow_default — explicit labels. + expect(screen.getByTestId('chain-link-strategy-L1').textContent).toBe('narrow ↓'); + expect(screen.getByTestId('chain-link-strategy-L2').textContent).toBe('widen ↑'); + expect(screen.getByTestId('chain-link-strategy-L3').textContent).toBe('refined'); + // swap_template — falls back to a short id slice while the template + // fetch is pending (no msw handler registered in this test). + const swapBadge = screen.getByTestId('chain-link-strategy-L4').textContent ?? ''; + expect(swapBadge.startsWith('swapped to ')).toBe(true); + }); + + it('AC-14: legacy chain with all-null selected_followup_kind renders no badges', () => { + setChain( + makeChain({ + anchor_study_id: 'L0', + best_link_id: 'L1', + cumulative_lift: 0.05, + direction: 'maximize', + stop_reason: 'no_lift', + links: [ + makeLink({ id: 'L0', name: 'Anchor', selected_followup_kind: null }), + makeLink({ id: 'L1', name: 'L1', selected_followup_kind: null }), + makeLink({ id: 'L2', name: 'L2', selected_followup_kind: null }), + ], + }), + ); + const study = makeStudy({ + id: 'L1', + parent_study_id: 'L0', + config: { auto_followup_depth: 2 }, + }); + renderPanel({ study, chainChildren: [] }); + expect(screen.queryByTestId('chain-link-strategy-L0')).toBeNull(); + expect(screen.queryByTestId('chain-link-strategy-L1')).toBeNull(); + expect(screen.queryByTestId('chain-link-strategy-L2')).toBeNull(); + }); }); diff --git a/ui/src/__tests__/components/studies/create-study-modal.overnight-strategy.test.tsx b/ui/src/__tests__/components/studies/create-study-modal.overnight-strategy.test.tsx new file mode 100644 index 00000000..e8428991 --- /dev/null +++ b/ui/src/__tests__/components/studies/create-study-modal.overnight-strategy.test.tsx @@ -0,0 +1,303 @@ +// SPDX-FileCopyrightText: 2026 soundminds.ai +// +// SPDX-License-Identifier: Apache-2.0 + +/** + * feat_overnight_final_solution Story 1.2 — wizard Strategy toggle. + * + * AC-4: toggle hidden when auto_followup_depth = 0/Off; visible with + * `"narrow"` default when depth >= 1. + * AC-5: submit with strategy="follow_suggestions" → POST body has both + * config.auto_followup_depth and config.auto_followup_strategy. + * Backward-compat: depth>=1 without explicit toggle change → wire value + * `"narrow"` (so the validator's pair-rule is satisfied and the worker + * dispatches the legacy path). + * + * Mirrors the test patterns in + * create-study-modal.auto-followup.test.tsx (the existing depth selector + * tests) — reuses the same shadcn-select mock + walkToStep5 helper shape. + */ + +import { http, HttpResponse } from 'msw'; +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { fireEvent, render, screen, waitFor } from '@testing-library/react'; +import { QueryClient, QueryClientProvider } from '@tanstack/react-query'; +import type { ReactNode } from 'react'; + +import { TooltipProvider } from '@/components/ui/tooltip'; + +import { server } from '../../setup'; + +vi.mock('@/components/ui/select', async () => { + const { mockShadcnSelect } = await import('../../helpers/shadcn-select-mock'); + return mockShadcnSelect(); +}); + +vi.mock('sonner', () => ({ + toast: Object.assign(vi.fn(), { error: vi.fn(), success: vi.fn() }), + Toaster: () => null, +})); + +const { CreateStudyModal } = await import('@/components/studies/create-study-modal'); + +const API_BASE = 'http://api.test'; + +function wrap(node: ReactNode) { + const qc = new QueryClient({ defaultOptions: { queries: { retry: false } } }); + return render( + + {node} + , + ); +} + +interface PostBody { + config?: { + auto_followup_depth?: unknown; + auto_followup_strategy?: unknown; + [key: string]: unknown; + }; + [key: string]: unknown; +} + +function mockBackend() { + const postBodies: PostBody[] = []; + server.use( + http.get(`${API_BASE}/api/v1/clusters`, () => + HttpResponse.json( + { + data: [ + { + id: 'c1', + name: 'local-es', + engine_type: 'elasticsearch', + environment: 'dev', + base_url: 'http://localhost:9200', + auth_kind: 'es_apikey', + created_at: '2026-05-12T00:00:00Z', + health_check: { + status: 'green', + version: '9.4.0', + checked_at: '2026-05-12T00:00:00Z', + error: null, + }, + }, + ], + next_cursor: null, + has_more: false, + }, + { headers: { 'X-Total-Count': '1' } }, + ), + ), + http.get(`${API_BASE}/api/v1/clusters/c1/schema`, () => HttpResponse.json({ fields: [] })), + http.get(`${API_BASE}/api/v1/clusters/c1/targets`, () => + HttpResponse.json({ data: [{ name: 'products', doc_count: 42 }] }), + ), + http.get(`${API_BASE}/api/v1/query-sets`, () => + HttpResponse.json( + { + data: [{ id: 'qs1', name: 'demo', query_count: 5, created_at: '2026-05-12T00:00:00Z' }], + next_cursor: null, + has_more: false, + }, + { headers: { 'X-Total-Count': '1' } }, + ), + ), + http.get(`${API_BASE}/api/v1/judgment-lists`, () => + HttpResponse.json( + { + data: [ + { + id: 'jl1', + name: 'demo', + status: 'complete', + source: 'llm', + created_at: '2026-05-12T00:00:00Z', + }, + ], + next_cursor: null, + has_more: false, + }, + { headers: { 'X-Total-Count': '1' } }, + ), + ), + http.get(`${API_BASE}/api/v1/query-templates`, () => + HttpResponse.json( + { + data: [ + { + id: 'tpl1', + name: 'T1', + engine_type: 'elasticsearch', + version: 1, + created_at: '2026-05-12T00:00:00Z', + declared_params: { boost_title: 'float' }, + }, + ], + next_cursor: null, + has_more: false, + }, + { headers: { 'X-Total-Count': '1' } }, + ), + ), + http.get(`${API_BASE}/api/v1/query-templates/tpl1`, () => + HttpResponse.json({ + id: 'tpl1', + name: 'T1', + engine_type: 'elasticsearch', + body: '{}', + declared_params: { boost_title: 'float' }, + version: 1, + parent_id: null, + created_at: '2026-05-12T00:00:00Z', + }), + ), + http.post(`${API_BASE}/api/v1/studies`, async ({ request }) => { + const body = (await request.json()) as PostBody; + postBodies.push(body); + return HttpResponse.json({ id: 'st1', name: 'demo', status: 'queued' }); + }), + ); + return { postBodies }; +} + +async function walkToStep5(): Promise { + await waitFor(() => expect(screen.getByRole('option', { name: /local-es/ })).toBeInTheDocument()); + fireEvent.change(screen.getByLabelText('Cluster'), { target: { value: 'c1' } }); + await waitFor(() => + expect(screen.queryAllByRole('option', { name: /products/ }).length).toBeGreaterThan(0), + ); + fireEvent.change(screen.getByLabelText('Target index / collection'), { + target: { value: 'products' }, + }); + fireEvent.click(screen.getByTestId('step-next')); + await waitFor(() => expect(screen.getByTestId('step-2')).toBeInTheDocument()); + await waitFor(() => + expect(screen.queryAllByRole('option', { name: 'demo' }).length).toBeGreaterThan(0), + ); + fireEvent.change(screen.getByLabelText('Query set'), { target: { value: 'qs1' } }); + await waitFor(() => { + expect(screen.queryAllByRole('option', { name: 'demo' }).length).toBeGreaterThanOrEqual(2); + }); + fireEvent.change(screen.getByLabelText('Judgment list'), { target: { value: 'jl1' } }); + fireEvent.click(screen.getByTestId('step-next')); + await waitFor(() => expect(screen.getByTestId('step-3')).toBeInTheDocument()); + await waitFor(() => expect(screen.getAllByRole('option').length).toBeGreaterThan(0)); + fireEvent.change(screen.getByLabelText('Query template (filtered by engine)'), { + target: { value: 'tpl1' }, + }); + fireEvent.click(screen.getByTestId('step-next')); + await waitFor(() => expect(screen.getByTestId('step-4')).toBeInTheDocument()); + fireEvent.change(screen.getByLabelText('Study name'), { + target: { value: 'overnight-strategy-test' }, + }); + fireEvent.click(screen.getByTestId('step-next')); + await waitFor(() => expect(screen.getByTestId('step-5')).toBeInTheDocument()); +} + +function getDepthSelect(): HTMLSelectElement { + return screen.getByTestId('cs-auto-followup') as HTMLSelectElement; +} + +function queryStrategySelect(): HTMLSelectElement | null { + return screen.queryByTestId('cs-overnight-strategy') as HTMLSelectElement | null; +} + +describe('CreateStudyModal — overnight Strategy toggle (Story 1.2, FR-2)', () => { + afterEach(() => server.resetHandlers()); + + // AC-4 (hidden): depth=0 means the toggle is not in the DOM at all. + it('AC-4: Strategy toggle is NOT rendered when auto_followup_depth = Off (0)', async () => { + mockBackend(); + wrap( {}} />); + await walkToStep5(); + + expect(getDepthSelect().value).toBe('0'); + expect(queryStrategySelect()).toBeNull(); + }); + + // AC-4 (visible w/ default): depth becomes >= 1 → toggle appears with + // "narrow" selected by default so the wire contract is the safe legacy + // behavior unless the operator opts in. + it('AC-4: Strategy toggle appears with "narrow" default when depth becomes >= 1', async () => { + mockBackend(); + wrap( {}} />); + await walkToStep5(); + + fireEvent.change(getDepthSelect(), { target: { value: '3' } }); + await waitFor(() => expect(getDepthSelect().value).toBe('3')); + + const strategy = queryStrategySelect(); + expect(strategy).not.toBeNull(); + expect(strategy!.value).toBe('narrow'); + }); + + // AC-4 (hide on revert): depth back to Off hides the toggle in the + // same render cycle. + it('AC-4: Strategy toggle disappears when depth returns to Off', async () => { + mockBackend(); + wrap( {}} />); + await walkToStep5(); + + fireEvent.change(getDepthSelect(), { target: { value: '2' } }); + await waitFor(() => expect(queryStrategySelect()).not.toBeNull()); + + fireEvent.change(getDepthSelect(), { target: { value: '0' } }); + await waitFor(() => expect(getDepthSelect().value).toBe('0')); + expect(queryStrategySelect()).toBeNull(); + }); + + // AC-5: explicit follow_suggestions opt-in → submit payload carries + // both config keys. + it('AC-5: submit with depth=3 + strategy=follow_suggestions → POST body has both keys', async () => { + const { postBodies } = mockBackend(); + wrap( {}} />); + await walkToStep5(); + + fireEvent.change(getDepthSelect(), { target: { value: '3' } }); + await waitFor(() => expect(queryStrategySelect()).not.toBeNull()); + fireEvent.change(queryStrategySelect()!, { target: { value: 'follow_suggestions' } }); + await waitFor(() => expect(queryStrategySelect()!.value).toBe('follow_suggestions')); + + fireEvent.click(screen.getByRole('button', { name: /Create study/i })); + + await waitFor(() => expect(postBodies.length).toBeGreaterThan(0)); + expect(postBodies[0]!.config?.auto_followup_depth).toBe(3); + expect(postBodies[0]!.config?.auto_followup_strategy).toBe('follow_suggestions'); + }); + + // Backward-compat default: depth>=1 with no toggle change → wire value + // "narrow". The validator's pair-rule requires the strategy be set when + // depth>=1; sending "narrow" preserves the legacy worker path while + // satisfying the contract. + it('submit with depth=2 (default strategy) → POST body has auto_followup_strategy="narrow"', async () => { + const { postBodies } = mockBackend(); + wrap( {}} />); + await walkToStep5(); + + fireEvent.change(getDepthSelect(), { target: { value: '2' } }); + await waitFor(() => expect(getDepthSelect().value).toBe('2')); + + fireEvent.click(screen.getByRole('button', { name: /Create study/i })); + + await waitFor(() => expect(postBodies.length).toBeGreaterThan(0)); + expect(postBodies[0]!.config?.auto_followup_depth).toBe(2); + expect(postBodies[0]!.config?.auto_followup_strategy).toBe('narrow'); + }); + + // depth=Off → omit both keys (legacy backward-compat, byte-identical + // wire shape to pre-feature studies). + it('submit with depth=Off → POST body omits both auto_followup_depth and auto_followup_strategy', async () => { + const { postBodies } = mockBackend(); + wrap( {}} />); + await walkToStep5(); + + expect(getDepthSelect().value).toBe('0'); + fireEvent.click(screen.getByRole('button', { name: /Create study/i })); + + await waitFor(() => expect(postBodies.length).toBeGreaterThan(0)); + const config = postBodies[0]!.config ?? {}; + expect('auto_followup_depth' in config).toBe(false); + expect('auto_followup_strategy' in config).toBe(false); + }); +}); diff --git a/ui/src/__tests__/lib/enums-overnight-strategy-discipline.test.ts b/ui/src/__tests__/lib/enums-overnight-strategy-discipline.test.ts new file mode 100644 index 00000000..a86220fd --- /dev/null +++ b/ui/src/__tests__/lib/enums-overnight-strategy-discipline.test.ts @@ -0,0 +1,32 @@ +// SPDX-FileCopyrightText: 2026 soundminds.ai +// +// SPDX-License-Identifier: Apache-2.0 + +/** + * feat_overnight_final_solution Story 1.2 — enum value-lock. + * + * The backend Pydantic field `StudyConfigSpec.auto_followup_strategy` is + * `str | None` (NOT `Literal[...]`) per spec D-13 — the enum tuple at + * backend/app/api/v1/schemas.py `AUTO_FOLLOWUP_STRATEGY_VALUES` is the + * source of truth that both the backend validator AND this frontend mirror + * cite. Drift on either side trips this lock or the backend pair at + * backend/tests/contract/test_studies_api_contract.py. + */ + +import { describe, expect, it } from 'vitest'; + +import { OVERNIGHT_STRATEGY_VALUES, type OvernightStrategy } from '@/lib/enums'; + +describe('OVERNIGHT_STRATEGY_VALUES', () => { + it('contains exactly the two strategies in canonical order', () => { + expect(OVERNIGHT_STRATEGY_VALUES.length).toBe(2); + expect(OVERNIGHT_STRATEGY_VALUES).toEqual(['narrow', 'follow_suggestions']); + }); + + it('type alias narrows to the union of canonical values', () => { + const strategy: OvernightStrategy = 'narrow'; + expect(strategy).toBe('narrow'); + const broader: OvernightStrategy = 'follow_suggestions'; + expect(broader).toBe('follow_suggestions'); + }); +}); diff --git a/ui/src/__tests__/lib/enums-selected-followup-kind-discipline.test.ts b/ui/src/__tests__/lib/enums-selected-followup-kind-discipline.test.ts new file mode 100644 index 00000000..80d2089b --- /dev/null +++ b/ui/src/__tests__/lib/enums-selected-followup-kind-discipline.test.ts @@ -0,0 +1,36 @@ +// SPDX-FileCopyrightText: 2026 soundminds.ai +// +// SPDX-License-Identifier: Apache-2.0 + +/** + * feat_overnight_final_solution Story 3.2 — enum value-lock. + * + * The backend source-of-truth is the + * `SELECTED_FOLLOWUP_KIND_VALUES` tuple at + * backend/app/domain/study/auto_followup_strategy.py — drift on either + * side trips this lock or the backend pair at + * backend/tests/contract/test_studies_chain_contract.py. + */ + +import { describe, expect, it } from 'vitest'; + +import { SELECTED_FOLLOWUP_KIND_VALUES, type SelectedFollowupKind } from '@/lib/enums'; + +describe('SELECTED_FOLLOWUP_KIND_VALUES', () => { + it('contains exactly the four kinds in canonical order', () => { + expect(SELECTED_FOLLOWUP_KIND_VALUES.length).toBe(4); + expect(SELECTED_FOLLOWUP_KIND_VALUES).toEqual([ + 'narrow_default', + 'narrow', + 'widen', + 'swap_template', + ]); + }); + + it('type alias narrows to the union of canonical values', () => { + const k: SelectedFollowupKind = 'narrow_default'; + expect(k).toBe('narrow_default'); + const k2: SelectedFollowupKind = 'swap_template'; + expect(k2).toBe('swap_template'); + }); +}); diff --git a/ui/src/__tests__/lib/glossary.test.ts b/ui/src/__tests__/lib/glossary.test.ts index 260650c2..9883dc15 100644 --- a/ui/src/__tests__/lib/glossary.test.ts +++ b/ui/src/__tests__/lib/glossary.test.ts @@ -116,6 +116,27 @@ describe('feat_digest_executable_followups Story 5.3 — followup glossary keys' }); }); +describe('feat_overnight_final_solution Story 1.2 — overnight_strategy glossary key (AC-16)', () => { + it('overnight_strategy entry exists with short + long', () => { + expect(glossary['overnight_strategy'], 'glossary[overnight_strategy] missing').toBeDefined(); + expect(glossary['overnight_strategy']?.short).toBeTruthy(); + expect(glossary['overnight_strategy']?.long).toBeTruthy(); + }); + + it('short ≤ 120 chars and contains both wire values verbatim', () => { + const entry = glossary['overnight_strategy']; + expect(entry).toBeDefined(); + const short = entry!.short!; + // Spec FR-9 requires ≤ 120 — tightened from the relaxed 140-char + // limit after GPT-5.5 final review (F3). + expect(short.length).toBeLessThanOrEqual(120); + // AC-16 — the two wire values must appear verbatim in `short` so the + // frontend mapping never drifts silently from the backend allowlist. + expect(short).toContain('"narrow"'); + expect(short).toContain('"follow_suggestions"'); + }); +}); + describe('glossary content shape (FR-5)', () => { it('every `short` field is ≤140 characters', () => { for (const [key, entry] of Object.entries(glossary)) { diff --git a/ui/src/components/studies/auto-followup-chain-panel.tsx b/ui/src/components/studies/auto-followup-chain-panel.tsx index 678d1666..f006637c 100644 --- a/ui/src/components/studies/auto-followup-chain-panel.tsx +++ b/ui/src/components/studies/auto-followup-chain-panel.tsx @@ -15,6 +15,7 @@ import { type StudyDetail, type StudySummary, } from '@/lib/api/studies'; +import { useTemplate } from '@/lib/api/query-templates'; export interface AutoFollowupChainPanelProps { study: StudyDetail; @@ -56,6 +57,65 @@ function formatDelta(value: number | null | undefined): string { return `${value >= 0 ? '+' : ''}${value.toFixed(4)}`; } +/** + * feat_overnight_final_solution Story 3.2 / FR-7 — per-link Strategy badge. + * + * Renders nothing when `link.selected_followup_kind` is null (anchor, or + * any chain under the legacy "narrow" strategy per D-12). When set, maps + * the wire kind to a compact label: + * + * - "narrow_default" → "refined" (follow_suggestions fallback path — + * operator picked suggestions but the autopilot had nothing + * executable to run; the "refined" badge is the audit signal). + * - "narrow" → "narrow ↓" (digest's narrow suggestion was run). + * - "widen" → "widen ↑" (digest's widen suggestion was run). + * - "swap_template" → "swapped to {short_name}" — resolved via a per- + * link GET /api/v1/query-templates/{link.template_id} fetch (per + * D-11 / OQ-1 resolution; chain payload is kept stable). Falls back + * to a 6-char id prefix while the fetch is pending or errors. + * + * Values must match backend/app/domain/study/auto_followup_strategy.py + * SELECTED_FOLLOWUP_KIND_VALUES. + */ +function ChainLinkStrategyBadge({ + link, +}: { + link: StudyChainResponse['links'][number]; +}): React.ReactNode { + const kind = link.selected_followup_kind; + // Hooks must run unconditionally; we ALWAYS call useTemplate but pass + // null for non-swap links so it stays disabled (the hook's `enabled` + // gate handles the null id). + const templateQ = useTemplate(kind === 'swap_template' ? link.template_id : null); + if (!kind) return null; + let label: string; + if (kind === 'narrow_default') { + label = 'refined'; + } else if (kind === 'narrow') { + label = 'narrow ↓'; + } else if (kind === 'widen') { + label = 'widen ↑'; + } else { + // swap_template — show the swap target's short name. Truncate + // long names to 30 chars so the badge stays compact. + const fullName = templateQ.data?.name; + const truncated = fullName + ? fullName.length > 30 + ? `${fullName.slice(0, 30)}…` + : fullName + : link.template_id.slice(0, 6); + label = `swapped to ${truncated}`; + } + return ( + + {label} + + ); +} + /** * Auto-followup chain panel (feat_auto_followup_studies Story 3.1, FR-10 * frontend; extended by feat_overnight_autopilot FR-4). @@ -203,6 +263,7 @@ export function AutoFollowupChainPanel({ ? link.best_metric.toFixed(4) : '—'} {delta && ({delta})} + ); })} diff --git a/ui/src/components/studies/create-study-modal.tsx b/ui/src/components/studies/create-study-modal.tsx index 93f08a26..aef86ad0 100644 --- a/ui/src/components/studies/create-study-modal.tsx +++ b/ui/src/components/studies/create-study-modal.tsx @@ -49,11 +49,13 @@ import { OBJECTIVE_DIRECTION_VALUES, OBJECTIVE_K_VALUES, OBJECTIVE_METRIC_VALUES, + OVERNIGHT_STRATEGY_VALUES, PRUNER_VALUES, SAMPLER_VALUES, type ObjectiveDirection, type ObjectiveK, type ObjectiveMetric, + type OvernightStrategy, type PrunerKind, type SamplerKind, } from '@/lib/enums'; @@ -161,6 +163,12 @@ interface FormValues { // the wire, which the backend treats as "off"). Wire-`0` is reserved for // the worker's decrement path per FR-1 + D-12 — the wizard never sends it. auto_followup_depth?: 0 | 1 | 2 | 3 | 4 | 5; + // feat_overnight_final_solution Story 1.2 / FR-2 — strategy toggle. + // Visible only when `auto_followup_depth >= 1`. Default `'narrow'` when + // visible. Wire values mirror `OVERNIGHT_STRATEGY_VALUES` from `enums.ts` + // (source-of-truth: backend/app/api/v1/schemas.py + // AUTO_FOLLOWUP_STRATEGY_VALUES). + auto_followup_strategy?: OvernightStrategy; } const STEP_TITLES = [ @@ -713,6 +721,9 @@ export function CreateStudyModal({ open, onOpenChange, initialValues }: CreateSt pruner?: PrunerKind; seed?: number; auto_followup_depth?: number; + // feat_overnight_final_solution Story 1.2 / FR-2 — Strategy wire field, + // written only when auto_followup_depth >= 1 (pair-rule). + auto_followup_strategy?: OvernightStrategy; }; const config: ConfigSpec = {}; if (typeof values.max_trials === 'number') config.max_trials = values.max_trials; @@ -727,6 +738,13 @@ export function CreateStudyModal({ open, onOpenChange, initialValues }: CreateSt // the worker's decrement-to-terminal path per FR-1 + D-12. if (typeof values.auto_followup_depth === 'number' && values.auto_followup_depth > 0) { config.auto_followup_depth = values.auto_followup_depth; + // feat_overnight_final_solution Story 1.2 / FR-2 — write the strategy + // ONLY when depth >= 1 (the backend pair-rule validator at + // schemas.py:_validate_auto_followup_strategy would 422 otherwise). + // Defaults to `'narrow'` for byte-identical legacy behavior; the + // wizard toggle replaces this value when the operator opts in to + // `'follow_suggestions'`. + config.auto_followup_strategy = values.auto_followup_strategy ?? 'narrow'; } setSubmitting(true); @@ -1468,10 +1486,23 @@ export function CreateStudyModal({ open, onOpenChange, initialValues }: CreateSt value={String(values.auto_followup_depth ?? 0)} onValueChange={(v: string) => { const n = Number.parseInt(v, 10); + const wasOff = + values.auto_followup_depth === undefined || values.auto_followup_depth === 0; if (n === 0) { form.setValue('auto_followup_depth', undefined); + // feat_overnight_final_solution F1 (GPT-5.5 final review) + // — clear the strategy when the toggle hides so the + // next reveal deterministically starts from the + // safe "narrow" default rather than a stale value. + form.setValue('auto_followup_strategy', undefined); } else { form.setValue('auto_followup_depth', n as 1 | 2 | 3 | 4 | 5); + // F1 reset: when transitioning Off (0/undefined) → ≥ 1 + // the spec FR-2 says the toggle defaults to "narrow" + // whenever it becomes visible. + if (wasOff) { + form.setValue('auto_followup_strategy', 'narrow'); + } } }} > @@ -1493,6 +1524,46 @@ export function CreateStudyModal({ open, onOpenChange, initialValues }: CreateSt you still open every PR by hand.

+ {/* + feat_overnight_final_solution Story 1.2 / FR-2 — strategy toggle. + Visible only when auto_followup_depth >= 1 (pair-rule per backend + _validate_auto_followup_strategy at schemas.py:_validate_auto_followup_strategy). + Source-of-truth: backend/app/api/v1/schemas.py AUTO_FOLLOWUP_STRATEGY_VALUES. + */} + {typeof values.auto_followup_depth === 'number' && + values.auto_followup_depth >= 1 && ( +
+
+ + +
+ +

+ Refine: each follow-up tightens around the previous winner on the same knobs. + Try suggestions: each follow-up acts on the digest’s top runnable + recommendation, which may switch knobs or templates. Refine is the safer + default; Try suggestions explores broader. +

+
+ )}
)} diff --git a/ui/src/lib/enums.ts b/ui/src/lib/enums.ts index 2fcdf11c..557c494a 100644 --- a/ui/src/lib/enums.ts +++ b/ui/src/lib/enums.ts @@ -81,6 +81,32 @@ export const CONVERGENCE_VERDICT_VALUES = [ ] as const; export type ConvergenceVerdict = (typeof CONVERGENCE_VERDICT_VALUES)[number]; +// Values must match backend/app/api/v1/schemas.py AUTO_FOLLOWUP_STRATEGY_VALUES. +// feat_overnight_final_solution Story 1.1 / D-13 — the backend Pydantic field is +// `str | None` (NOT a Literal) so the canonical AUTO_FOLLOWUP_STRATEGY_INVALID +// error envelope works; the enum tuple is the source of truth that both the +// backend validator and this frontend mirror cite. Value-lock vitest at +// ui/src/__tests__/lib/enums-overnight-strategy-discipline.test.ts asserts the +// exact array contents AND order. +export const OVERNIGHT_STRATEGY_VALUES = ['narrow', 'follow_suggestions'] as const; +export type OvernightStrategy = (typeof OVERNIGHT_STRATEGY_VALUES)[number]; + +// Values must match backend/app/domain/study/auto_followup_strategy.py SELECTED_FOLLOWUP_KIND_VALUES. +// feat_overnight_final_solution Story 3.2 / FR-6 — mirrors the additive +// `selected_followup_kind` field on StudyChainLink. `narrow_default` marks +// the follow_suggestions fallback path (operator picked suggestions but +// the autopilot fell back); the legacy/default narrow path persists no +// key at all per D-12 (the API field is null, no badge rendered). +// Value-lock vitest at +// ui/src/__tests__/lib/enums-selected-followup-kind-discipline.test.ts. +export const SELECTED_FOLLOWUP_KIND_VALUES = [ + 'narrow_default', + 'narrow', + 'widen', + 'swap_template', +] as const; +export type SelectedFollowupKind = (typeof SELECTED_FOLLOWUP_KIND_VALUES)[number]; + // Values must match backend/app/api/v1/schemas.py ObjectiveMetric. // ERR@k is deferred to MVP2 per infra_optuna_eval feature_spec.md §3 / §FR-3 / §13; // add it back here when scoring.py SUPPORTED_METRICS grows the entry. diff --git a/ui/src/lib/glossary.ts b/ui/src/lib/glossary.ts index 572fe407..b0ed21aa 100644 --- a/ui/src/lib/glossary.ts +++ b/ui/src/lib/glossary.ts @@ -937,6 +937,25 @@ export const glossary = { ].join('\n'), ariaLabel: 'More information about the overnight autopilot', }, + // feat_overnight_final_solution Story 1.2 / FR-9 — new key for the Strategy + //