Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions backend/app/api/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@
{
# feat_auto_followup_studies Story 1.1 β€” StudyConfigSpec.auto_followup_depth
"AUTO_FOLLOWUP_DEPTH_OUT_OF_RANGE",
# feat_overnight_final_solution Story 1.1 β€” StudyConfigSpec.auto_followup_strategy
# Covers both the value-rule and pair-rule (depth β‰₯ 1) failures, plus
# the worker-managed-key reject (auto_followup_visited_template_ids
# / auto_followup_selected_kind set by an operator at create time).
"AUTO_FOLLOWUP_STRATEGY_INVALID",
}
)

Expand Down
108 changes: 108 additions & 0 deletions backend/app/api/v1/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,56 @@ class StudyConfigSpec(BaseModel):
carry ``AUTO_FOLLOWUP_DEPTH_OUT_OF_RANGE`` per spec Β§8.5 (the prefix
parser in :mod:`backend.app.api.errors` picks up the ``<CODE>:``
prefix from the raised ValueError message)."""
auto_followup_strategy: str | None = Field(default=None)
"""feat_overnight_final_solution FR-1 + D-13: ``"narrow"`` | ``"follow_suggestions"``
| ``None`` (treated as ``"narrow"`` by the worker).

**Field type is ``str | None`` (NOT ``Literal[...]``)** β€” per spec D-13,
a field-level ``Literal`` would surface bad values as Pydantic's generic
``VALIDATION_ERROR`` envelope BEFORE the ``mode="after"`` validator
could emit the canonical ``AUTO_FOLLOWUP_STRATEGY_INVALID`` code. Same
pattern as ``auto_followup_depth`` above: enum check + pair rule done
in :meth:`_validate_auto_followup_strategy` via the ``<CODE>:`` prefix
convention so :func:`backend.app.api.errors.validation_exception_handler`
unwraps the canonical envelope. The two accepted values are exposed as
the module-level :data:`AUTO_FOLLOWUP_STRATEGY_VALUES` tuple (consumed
by the CI source-of-truth grep gate and mirrored as
``OVERNIGHT_STRATEGY_VALUES`` in ``ui/src/lib/enums.ts``)."""

@model_validator(mode="before")
@classmethod
def _reject_worker_managed_keys(cls, data: object) -> object:
"""Reject operator-submitted worker-managed JSONB keys (D-14).

``auto_followup_visited_template_ids`` + ``auto_followup_selected_kind``
are written ONLY by the autopilot worker on chain children. Allowing
the wizard to seed them would break the single-writer rule for the
cycle-guard list and risk spoofed badges on the chain panel.

``StudyConfigSpec`` defaults to ``extra="ignore"`` (Pydantic default
β€” no ``model_config`` declared above), so an unknown key is silently
dropped before any ``mode="after"`` validator runs. This
``mode="before"`` validator inspects the raw dict so the keys
actually get rejected with the canonical envelope.

We deliberately do NOT set ``extra="forbid"`` model-wide: that would
broaden the blast radius and reject any future config key during
rollout (a stored config re-validated through this model in a
worker would fail).
"""
if not isinstance(data, dict):
return data
forbidden_keys = (
"auto_followup_visited_template_ids",
"auto_followup_selected_kind",
)
for key in forbidden_keys:
if key in data:
raise ValueError(
f"AUTO_FOLLOWUP_STRATEGY_INVALID: config.{key} is worker-managed "
"and may not be set at study creation"
)
return data

@model_validator(mode="after")
def _require_one_stop_condition(self) -> StudyConfigSpec:
Expand Down Expand Up @@ -748,6 +798,42 @@ def _validate_auto_followup_depth(self) -> StudyConfigSpec:
)
return self

@model_validator(mode="after")
def _validate_auto_followup_strategy(self) -> StudyConfigSpec:
"""feat_overnight_final_solution FR-1 + D-13: enum + pair check.

Two rules: (a) value MUST be in :data:`AUTO_FOLLOWUP_STRATEGY_VALUES`
when set, (b) value MUST only be set when ``auto_followup_depth >= 1``
(a strategy choice on a depth-0 study is meaningless).

Both surface as ``AUTO_FOLLOWUP_STRATEGY_INVALID`` via the
``<CODE>:`` prefix convention (allowlisted in
:data:`backend.app.api.errors._CUSTOM_ERROR_CODE_ALLOWLIST`).
"""
if self.auto_followup_strategy is None:
return self
if self.auto_followup_strategy not in AUTO_FOLLOWUP_STRATEGY_VALUES:
raise ValueError(
"AUTO_FOLLOWUP_STRATEGY_INVALID: config.auto_followup_strategy "
f"must be 'narrow' or 'follow_suggestions'; "
f"got {self.auto_followup_strategy!r}"
)
if self.auto_followup_depth is None or self.auto_followup_depth < 1:
raise ValueError(
"AUTO_FOLLOWUP_STRATEGY_INVALID: config.auto_followup_strategy "
"only applies when config.auto_followup_depth >= 1"
)
return self


# feat_overnight_final_solution Story 1.1 / D-13 β€” wire-value source of truth
# for ``StudyConfigSpec.auto_followup_strategy``. Mirrored by the frontend
# ``OVERNIGHT_STRATEGY_VALUES`` in ``ui/src/lib/enums.ts`` and consumed by
# the CI grep gate at ``scripts/ci/verify_enum_source_of_truth.sh``. Keep
# this declaration module-level (NOT inside the class) so the grep gate's
# AST resolver finds the bare tuple assignment.
AUTO_FOLLOWUP_STRATEGY_VALUES: tuple[str, ...] = ("narrow", "follow_suggestions")


class ParentFollowupRef(BaseModel):
"""Optional lineage payload on ``POST /api/v1/studies``.
Expand Down Expand Up @@ -883,6 +969,28 @@ class StudyChainLink(BaseModel):
failed_reason: str | None
created_at: datetime
completed_at: datetime | None
template_id: str
"""``studies.template_id`` β€” needed by the chain panel's swap_template
badge so the frontend can resolve the target template's display name
via ``GET /api/v1/query-templates/{id}``. Added by Story 3.1 per
P1-B5 (the badge is otherwise not buildable from the chain payload
alone). Non-optional β€” every study has a template."""
selected_followup_kind: Literal["narrow_default", "narrow", "widen", "swap_template"] | None = (
None
)
"""feat_overnight_final_solution Story 3.1 / FR-6 β€” the path
:func:`backend.app.workers.auto_followup.enqueue_followup_study` took
when creating this link. ``null`` for the anchor (no parent
follow-up to consume) and for every link created under the legacy
``"narrow"`` strategy (per D-12 the legacy path persists no
``auto_followup_selected_kind`` key). The chain endpoint applies a
defensive coercion before populating this field: an unknown JSONB
value in ``studies.config.auto_followup_selected_kind`` (manual DB
INSERT, schema drift) coerces to ``null`` + a
``chain_selected_kind_unknown`` WARN β€” never raises a Pydantic
``ValidationError`` that would 500 the endpoint. Mirrored
character-for-character by ``ui/src/lib/enums.ts SELECTED_FOLLOWUP_KIND_VALUES``
(Story 3.2)."""


class StudyChainResponse(BaseModel):
Expand Down
27 changes: 27 additions & 0 deletions backend/app/api/v1/studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from datetime import datetime
from typing import Annotated, Any

import structlog
import uuid_utils
from fastapi import APIRouter, Depends, HTTPException, Query, Request, Response, status
from pydantic import ValidationError
Expand All @@ -62,6 +63,9 @@
from backend.app.db import repo
from backend.app.db.models import Study
from backend.app.db.session import get_db
from backend.app.domain.study.auto_followup_strategy import (
SELECTED_FOLLOWUP_KIND_VALUES,
)
from backend.app.domain.study.chain_summary import (
_direction_normalized_delta_from_prev,
compute_cumulative_lift,
Expand All @@ -84,6 +88,7 @@
)
from backend.app.services.study_preflight import MIN_OVERLAP, probe_judgment_overlap

logger = structlog.get_logger(__name__)
router = APIRouter()

DEFAULT_PAGE_LIMIT = 50
Expand Down Expand Up @@ -863,6 +868,26 @@ async def get_study_chain(
if not link_entries
else _direction_normalized_delta_from_prev(lk.best_metric, prev_metric, link_direction)
)
# feat_overnight_final_solution Story 3.1 / FR-6 β€” defensive
# coercion for the new selected_followup_kind field. studies.config
# is JSONB with no CHECK; a malformed value (manual INSERT, schema
# drift, future version row read by an older deploy) must NOT
# surface as a Pydantic ValidationError that 500s the endpoint.
# Mirrors the parse_followup_list defensive-ingest contract for
# digests.suggested_followups. Per spec D-12, legacy/default
# chains write no key at all, so the absent case is the COMMON
# path here β€” only unknown non-None values trigger the WARN.
raw_selected_kind = lk.config.get("auto_followup_selected_kind")
selected_kind: str | None = (
raw_selected_kind if raw_selected_kind in SELECTED_FOLLOWUP_KIND_VALUES else None
)
if raw_selected_kind is not None and raw_selected_kind not in SELECTED_FOLLOWUP_KIND_VALUES:
logger.warning(
"chain selected_followup_kind has unknown value; coerced to null",
event_type="chain_selected_kind_unknown",
study_id=lk.id,
raw_value=str(raw_selected_kind)[:64],
)
link_entries.append(
StudyChainLink(
id=lk.id,
Expand All @@ -877,6 +902,8 @@ async def get_study_chain(
failed_reason=lk.failed_reason,
created_at=lk.created_at,
completed_at=lk.completed_at,
template_id=lk.template_id,
selected_followup_kind=selected_kind,
)
)
prev_metric = lk.best_metric
Expand Down
186 changes: 186 additions & 0 deletions backend/app/domain/study/auto_followup_strategy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# SPDX-FileCopyrightText: 2026 soundminds.ai
#
# SPDX-License-Identifier: Apache-2.0

"""Pure-domain selector for the autopilot's ``follow_suggestions`` strategy.

Owner: ``feat_overnight_final_solution`` Story 2.1.

When :data:`backend.app.db.models.study.Study.config` carries
``auto_followup_strategy = "follow_suggestions"``, the autopilot worker
(:mod:`backend.app.workers.auto_followup`) consumes the parent's persisted
digest follow-ups instead of always running the Β±50% narrow on the same
template. This module is the pure-domain selector that walks the digest's
``suggested_followups`` list, filters to executable kinds, applies the
cycle guard (no ``swap_template`` whose target is already in
``parent.config.auto_followup_visited_template_ids``), and returns a
:class:`SelectionOutcome` carrying everything the worker needs for both
the dispatch decision AND the telemetry it must emit afterwards
(``source_index``, ``candidate_count``, ``dropped_template_ids``).

**Pure** β€” no DB, no I/O, no async. Deterministic: same input β†’ same
output. Unit-testable without fixtures.

**Always returns a ``SelectionOutcome``** (never ``None``). The
"no executable candidate" case is encoded as ``selected is None`` so the
fallback-event telemetry can still carry ``dropped_template_ids`` for
diagnostics β€” when every executable item was a ``swap_template`` to an
already-visited template, the operator immediately sees "the chain wanted
to ping-pong but the guard fired" from one log line.

Spec: ``docs/00_overview/planned_features/02_mvp2/feat_overnight_final_solution/feature_spec.md``
(FR-4 + spec FR-3 dispatch + cycle 1 finding C1-A2 + cycle 2 finding C2-A1).
"""

from __future__ import annotations

from dataclasses import dataclass

from backend.app.domain.study.followups import (
FollowupItem,
NarrowFollowup,
SwapTemplateFollowup,
TextFollowup,
WidenFollowup,
)

# feat_overnight_final_solution Story 2.1 / FR-6 β€” wire-value source of
# truth for ``StudyChainLink.selected_followup_kind``. Mirrored by the
# frontend ``SELECTED_FOLLOWUP_KIND_VALUES`` in ``ui/src/lib/enums.ts``
# (added by Story 3.2). Consumed by the CI grep gate at
# ``scripts/ci/verify_enum_source_of_truth.sh``.
#
# ``"narrow_default"`` marks a chain link the worker took via the narrow
# fallback path under the ``follow_suggestions`` strategy β€” distinct from
# the legacy/default narrow path (which persists NO ``auto_followup_selected_kind``
# key at all, per D-12).
SELECTED_FOLLOWUP_KIND_VALUES: tuple[str, ...] = (
"narrow_default",
"narrow",
"widen",
"swap_template",
)


@dataclass(frozen=True, slots=True)
class SelectionOutcome:
"""The result of :func:`select_executable_followup`.

``selected`` is ``None`` when no executable candidate remained after
the cycle-guard filter β€” the worker dispatches the fallback-to-narrow
path in that case. ``dropped_template_ids`` is **always** populated
with the cycle-guard-dropped ``SwapTemplateFollowup.template_id``
values (sorted ascending for deterministic telemetry) β€” even when
``selected is None``, so the fallback event carries the same
drop-diagnostics as a successful selection.
"""

selected: FollowupItem | None
"""The executable follow-up to dispatch, or ``None`` to fall back."""

source_index: int | None
"""0-based index of the selected item in the ORIGINAL ``followups`` list
(not in the post-filter list), so telemetry can correlate with the
digest's persisted order. ``None`` when ``selected is None``."""

candidate_count: int
"""Count of executable items in contention AFTER cycle-guard filtering.
``0`` when no executable item remained."""

dropped_template_ids: list[str]
"""Cycle-guard-dropped ``SwapTemplateFollowup.template_id`` values,
sorted ascending. Empty when no swap_template was dropped (e.g. the
digest had only narrow/widen executables, or only text)."""


def select_executable_followup(
followups: list[FollowupItem],
visited_template_ids: set[str],
) -> SelectionOutcome:
"""Select the top executable follow-up for the autopilot to dispatch.

Walks ``followups`` once, recording each item's original index. Drops:

* :class:`~backend.app.domain.study.followups.TextFollowup` items
(no ``search_space`` β€” nothing to run).
* :class:`~backend.app.domain.study.followups.SwapTemplateFollowup`
items whose ``template_id`` is in ``visited_template_ids`` (the
cycle guard β€” prevents template ping-pong).

The first remaining item by original index is the selection.
Relies on the digest's already-ordered list (convergence-aware
ordering per ``prompts/digest_narrative.system.md`` lines 99-121) β€”
no re-ranking inside the autopilot (D-5).

The cycle guard is **template-based, NOT search-space-based** (D-9):
a ``narrow`` / ``widen`` that keeps the same template is allowed
even if the parent's template is in the visited set β€” only
``swap_template`` items go through the cycle guard, and only against
their ``template_id``.

The function is **always** total: it returns a :class:`SelectionOutcome`
even when no executable item remains (with ``selected=None`` +
``source_index=None`` + ``candidate_count=0`` + the dropped IDs). The
worker uses the populated ``dropped_template_ids`` on the fallback
path so the telemetry distinguishes "digest was text-heavy" from
"all executables were cycle-dropped".

Args:
followups: The parent digest's ``suggested_followups`` list,
already parsed by :func:`backend.app.domain.study.followups.parse_followup_list`.
May be empty.
visited_template_ids: Templates already visited in this chain,
constructed by the worker from
``parent.config.get("auto_followup_visited_template_ids", [parent.template_id])``.
The worker does NOT add the prospective child template
BEFORE calling β€” the cycle guard's job is to look backward
only (D-9).

Returns:
A :class:`SelectionOutcome` describing the selection (or
absence thereof) plus telemetry fields. Never raises;
deterministic (same input β†’ same output).
"""
dropped_template_ids: list[str] = []
# Executable candidates that survived BOTH filters, with their
# original index recorded for the source_index telemetry field.
candidates: list[tuple[int, FollowupItem]] = []

for original_index, item in enumerate(followups):
# Drop text β€” no search_space to consume.
if isinstance(item, TextFollowup):
continue
# Cycle guard: swap_template to a visited template is dropped.
if isinstance(item, SwapTemplateFollowup) and item.template_id in visited_template_ids:
dropped_template_ids.append(item.template_id)
continue
# narrow / widen / non-cycled swap_template are all executable.
if isinstance(item, (NarrowFollowup, WidenFollowup, SwapTemplateFollowup)):
candidates.append((original_index, item))

dropped_template_ids.sort()

if not candidates:
return SelectionOutcome(
selected=None,
source_index=None,
candidate_count=0,
dropped_template_ids=dropped_template_ids,
)

# First executable item by original index β€” trust the digest's
# convergence-aware ordering (D-5).
source_index, selected = candidates[0]
return SelectionOutcome(
selected=selected,
source_index=source_index,
candidate_count=len(candidates),
dropped_template_ids=dropped_template_ids,
)


__all__ = [
"SELECTED_FOLLOWUP_KIND_VALUES",
"SelectionOutcome",
"select_executable_followup",
]
Loading
Loading