diff --git a/backend/app/api/v1/schemas.py b/backend/app/api/v1/schemas.py index f2b644a1..2e8b6336 100644 --- a/backend/app/api/v1/schemas.py +++ b/backend/app/api/v1/schemas.py @@ -1008,6 +1008,63 @@ class StudyChainResponse(BaseModel): links: list[StudyChainLink] +class RecentChainSummary(BaseModel): + """One row in the ``GET /api/v1/studies/chains/recent`` response. + + Per spec §8.1 (feat_overnight_studies_summary_card). Per-chain + rollup feeding the "Ran while you were away" card on ``/studies`` + — anchor identity + chain length + the best link's metric + the + chain's cumulative lift + the derived stop reason + the + surfaceable proposal id for the best link. Read-only; no state + transitions, no audit events. + """ + + anchor_study_id: str + anchor_name: str + chain_length: int + """``len(traversal.links)`` — guaranteed ``>= 2`` by the discovery + repo (FR-1).""" + best_metric: float | None + """``None`` when every link's ``best_metric IS NULL`` (e.g. a + terminal-failed chain). The card renders the stop-reason phrase in + place of the numeric line on this path (AC-11).""" + objective_metric: str + """``traversal.links[0].objective.get('metric')`` — surfaced so the + card can render "Best : " without an extra request.""" + cumulative_lift: float | None + """Direction-normalized lift via + :func:`backend.app.domain.study.chain_summary.compute_cumulative_lift`. + ``None`` when the completed-link subset is empty OR no baseline is + derivable (mirrors the chain panel's null-lift contract).""" + direction: ObjectiveDirection + stop_reason: ChainStopReason + """Derived per spec §9. Values must match + backend/app/domain/study/chain_summary.py CHAIN_STOP_REASONS.""" + best_link_proposal_id: str | None + """The selected (newest non-rejected) proposal for the best link, + surfaced so the card's "Review chain" link can deep-link directly + to the proposal when one exists.""" + tail_completed_at: datetime + """``traversal.links[-1].completed_at`` — the chain tail's terminal + timestamp. Drives the card's localStorage dismissal cutoff + (``max(tail_completed_at) + 1ms`` per FR-5).""" + + +class RecentChainsResponse(BaseModel): + """``GET /api/v1/studies/chains/recent`` response shape. + + Inert pagination: this endpoint emits ``next_cursor=null`` and + ``has_more=false`` always (OQ-2 resolved — limit-cap only). The + fields stay on the wire for consistency with the rest of the + studies surface, so a future MVP3 keyset-pagination story can + populate them without breaking clients (idea filed in this PR). + """ + + data: list[RecentChainSummary] + next_cursor: str | None = None + has_more: bool = False + + class StudySummary(BaseModel): """List-view shape.""" diff --git a/backend/app/api/v1/studies.py b/backend/app/api/v1/studies.py index 333b499d..82745ae6 100644 --- a/backend/app/api/v1/studies.py +++ b/backend/app/api/v1/studies.py @@ -49,6 +49,8 @@ from backend.app.api.v1.schemas import ( CreateStudyRequest, + RecentChainsResponse, + RecentChainSummary, StudyChainLink, StudyChainResponse, StudyDetail, @@ -599,6 +601,110 @@ async def list_studies( ) +# --------------------------------------------------------------------------- +# GET /api/v1/studies/chains/recent +# (feat_overnight_studies_summary_card §8.1) +# +# IMPORTANT: This static route MUST be declared BEFORE the +# ``/studies/{study_id}`` dynamic route below — FastAPI matches routes in +# registration order, so a dynamic ``{study_id}`` declared first would +# capture ``chains`` as the path param and 404 the lookup. The route-order +# regression assertion lives in test_studies_chain_recent_api.py. +# --------------------------------------------------------------------------- + + +def _recent_chain_row(traversal: repo.ChainTraversalResult) -> RecentChainSummary: + """Build one row of the recent-chains response. + + Mirrors the derivation block in :func:`get_study_chain` (lines + ~851-859) — same ``select_best_link`` / ``compute_cumulative_lift`` / + ``derive_chain_stop_reason`` helpers, same anchor-direction lookup, + same proposal-id-by-link-id map. We do NOT extract a shared helper + here per Plan §5 ("bounded shared-helper extraction only") — the two + derivation blocks render different response shapes, and one row + pulls only a subset of what ``get_study_chain`` emits. + """ + anchor = traversal.links[0] + tail = traversal.links[-1] + raw_direction = anchor.objective.get("direction", "maximize") + direction = raw_direction if raw_direction in ("maximize", "minimize") else "maximize" + stop_reason = derive_chain_stop_reason(traversal.links, traversal.anchor_trials) + cumulative_lift = compute_cumulative_lift(traversal.links, traversal.anchor_trials) + best_link_id = select_best_link(traversal.links) + best_metric = next((lk.best_metric for lk in traversal.links if lk.id == best_link_id), None) + best_link_proposal_id = ( + traversal.proposal_id_by_link_id.get(best_link_id) if best_link_id else None + ) + # ``tail.completed_at`` is guaranteed non-null by the discovery repo's + # candidate filter (status IN (terminal) AND completed_at IS NOT NULL), + # so the ``or _BASE_NEVER`` fallback below is dead code by construction + # — it satisfies mypy's ``datetime | None`` → ``datetime`` narrowing + # without an ``assert`` (which ruff S101 forbids in production code). + # ``select_best_link`` returns ``None`` for an all-NULL-best_metric + # tail (e.g. a chain whose only terminal status is "failed"), in + # which case ``best_metric`` here is ``None`` — the card renders the + # stop-reason phrase in that branch (AC-11). + tail_completed_at = tail.completed_at + if tail_completed_at is None: # pragma: no cover — discovery repo guarantees terminal tail + # Defensive: a future change to the candidate filter that drops + # the ``completed_at IS NOT NULL`` predicate must not silently + # ship a ValidationError 500. Skip the row by raising the same + # AC-11 null-metric branch upstream — but for now the row is + # always populated and this branch is unreachable. + raise _err( + 500, + "INTERNAL_ERROR", + f"chain tail {tail.id} has no completed_at", + True, + ) + return RecentChainSummary( + anchor_study_id=anchor.id, + anchor_name=anchor.name, + chain_length=len(traversal.links), + best_metric=best_metric, + objective_metric=str(anchor.objective.get("metric", "")), + cumulative_lift=cumulative_lift, + direction=direction, + stop_reason=stop_reason, + best_link_proposal_id=best_link_proposal_id, + tail_completed_at=tail_completed_at, + ) + + +@router.get( + "/studies/chains/recent", + response_model=RecentChainsResponse, + tags=["studies"], +) +async def get_recent_chains( + response: Response, + db: Annotated[AsyncSession, Depends(get_db)], + since: Annotated[datetime | None, Query()] = None, + limit: Annotated[int, Query(ge=1, le=50)] = 20, +) -> RecentChainsResponse: + """List recently-completed overnight chains (FR-1, AC-1/2/3/4/5/6/11/12). + + Returns the deduplicated set of completed overnight chains (length + >= 2) ordered newest-tail-completion-first, capped at ``limit``. The + ``since`` filter restricts to chains whose tail completed at or + after the cutoff (used by the card to seed the "what's new since I + last visited" query). + + Malformed ``since`` / out-of-range ``limit`` flow through the + global ``validation_exception_handler`` and return the canonical + 422 ``VALIDATION_ERROR`` envelope (no manual parse path). + + Pagination: inert. ``next_cursor=null`` and ``has_more=false`` + always — OQ-2 resolved limit-cap-only for v1. Keyset pagination + deferred to a separate ``chore_`` idea filed against the spec's + open questions. + """ + chains = await repo.list_recent_completed_chains(db, since=since, limit=limit) + rows = [_recent_chain_row(c) for c in chains] + response.headers["X-Total-Count"] = str(len(rows)) + return RecentChainsResponse(data=rows, next_cursor=None, has_more=False) + + # --------------------------------------------------------------------------- # GET /api/v1/studies/{id} # --------------------------------------------------------------------------- diff --git a/backend/app/db/repo/__init__.py b/backend/app/db/repo/__init__.py index f8cefcf1..be430992 100644 --- a/backend/app/db/repo/__init__.py +++ b/backend/app/db/repo/__init__.py @@ -132,6 +132,7 @@ hard_delete_study, list_children_of_study, list_queued_study_ids, + list_recent_completed_chains, list_running_study_ids, list_studies, ) @@ -201,6 +202,9 @@ # overnight-chain summary (FR-3). "ChainTraversalResult", "get_chain_for_study", + # feat_overnight_studies_summary_card Story 1.1 — recent-completed-chains + # discovery feeding the "Ran while you were away" card on /studies (FR-1). + "list_recent_completed_chains", # feat_llm_judgments Story 1.2 (judgments child table + judgment_list extensions) "bulk_create_judgments", "count_judgment_lists", diff --git a/backend/app/db/repo/study.py b/backend/app/db/repo/study.py index ffac1faf..d23a71e9 100644 --- a/backend/app/db/repo/study.py +++ b/backend/app/db/repo/study.py @@ -33,6 +33,7 @@ order_by_clauses, parse_sort, ) +from backend.app.domain.study.chain_summary import derive_chain_stop_reason logger = logging.getLogger(__name__) @@ -372,6 +373,117 @@ async def get_chain_for_study( ) +# --------------------------------------------------------------------------- +# feat_overnight_studies_summary_card Story 1.1 — recent-completed-chains +# discovery feeding the "Ran while you were away" card on /studies (FR-1). +# Pure read; one row per chain (anchor-deduped); terminal-only; length >= 2; +# in-flight defensively excluded; tail-completion-DESC ordering. +# --------------------------------------------------------------------------- + + +_TERMINAL_STUDY_STATUSES: tuple[str, ...] = ("completed", "cancelled", "failed") + + +async def list_recent_completed_chains( + db: AsyncSession, + *, + since: datetime | None = None, + limit: int = 20, +) -> list[ChainTraversalResult]: + """Return de-duplicated completed overnight chains (length >= 2). + + Newest tail-completion first, capped at ``limit`` distinct chains. + + Algorithm (FR-1): + + 1. SELECT candidate member ids — studies that ARE follow-up children + (``parent_study_id IS NOT NULL``, which guarantees their chain has + length >= 2) AND have terminated (``completed_at IS NOT NULL`` AND + ``status IN ('completed','cancelled','failed')``) AND, when + ``since`` is supplied, completed since that cutoff. Ordered by + ``completed_at DESC``. Scan-capped at ``limit * _CHAIN_MAX_DESCENDANTS`` + so dedup-to-anchor can still fill ``limit`` distinct chains in the + worst case where every chain is fully maxed out (anchor + 5 + descendants). + 2. For each candidate (newest first), resolve its anchor via + :func:`get_chain_for_study` and key into an ordered dict on + ``anchor_id`` to deduplicate to one row per chain. Skip anchors + already collected. + 3. Skip any chain whose + :func:`backend.app.domain.study.chain_summary.derive_chain_stop_reason` + returns ``"in_flight"`` — step 1 already excludes non-terminal + tails, but a chain with a still-running *interior* link must also + be excluded (mirrors the chain panel's terminal-only contract). + 4. Skip any chain whose ``len(links) < 2`` — defensive; the + ``parent_study_id IS NOT NULL`` candidate filter already implies + length >= 2, but a concurrent hard-delete of the anchor (no + ``ondelete='SET NULL'`` on the self-FK, so this is rare) could + leave a single-row traversal. + 5. Skip candidates whose :func:`get_chain_for_study` returns ``None`` + — the concurrent-delete race where a chain member is hard-deleted + between the candidate query and the traversal (reachable via the + ``hard_delete_study`` test teardown path). Mirrors the defensive + skip already in ``get_chain_for_study`` at the hydration step. + 6. Stop once ``limit`` distinct chains are collected. Return the + ``ChainTraversalResult`` list in tail-completion-DESC order + (preserved by the candidate-order traversal — the *first* + candidate hit for any chain is its newest terminal child, which + is the chain's tail). + """ + scan_cap = max(1, limit * _CHAIN_MAX_DESCENDANTS) + candidate_stmt = ( + select(Study.id) + .where( + Study.parent_study_id.is_not(None), + Study.completed_at.is_not(None), + Study.status.in_(_TERMINAL_STUDY_STATUSES), + ) + .order_by(Study.completed_at.desc(), Study.id.desc()) + .limit(scan_cap) + ) + if since is not None: + candidate_stmt = candidate_stmt.where(Study.completed_at >= since) + candidate_ids: list[str] = list((await db.execute(candidate_stmt)).scalars().all()) + + # Insertion-ordered dict — first hit per anchor wins, preserving + # tail-completion-DESC order. ``seen_study_ids`` tracks every member + # of every chain we've already resolved so subsequent candidates + # that belong to the same chain (a 6-link chain produces up to 5 + # qualifying candidates) skip the redundant traversal call. Per + # Gemini Code Assist PR-444 finding #1 — eliminates the N+1 pattern + # without changing the dedup outcome. + by_anchor: dict[str, ChainTraversalResult] = {} + seen_study_ids: set[str] = set() + for candidate_id in candidate_ids: + if len(by_anchor) >= limit: + break + if candidate_id in seen_study_ids: + continue + traversal = await get_chain_for_study(db, candidate_id) + if traversal is None: + # Concurrent hard-delete between candidate query and traversal + # (e.g. test teardown). Skip silently per Story 1.1 task 5. + continue + # Mark every walked link as seen BEFORE the dedup check so a + # candidate from the same chain is skipped early on the next + # iteration even if this chain ends up excluded by the in-flight / + # length guards below. + seen_study_ids.update(link.id for link in traversal.links) + if traversal.anchor_id in by_anchor: + continue + if len(traversal.links) < 2: + # Defensive; the candidate filter implies length >= 2 unless + # the anchor was concurrently deleted out of the chain. + continue + stop_reason = derive_chain_stop_reason(traversal.links, traversal.anchor_trials) + if stop_reason == "in_flight": + # Interior link still running — chain isn't done; exclude. + continue + by_anchor[traversal.anchor_id] = traversal + + return list(by_anchor.values()) + + # --------------------------------------------------------------------------- # chore_e2e_test_rows_isolation Story 1.1 — hard-delete for test-only cleanup # --------------------------------------------------------------------------- diff --git a/backend/tests/contract/test_openapi_surface.py b/backend/tests/contract/test_openapi_surface.py index de30b3b0..0addb780 100644 --- a/backend/tests/contract/test_openapi_surface.py +++ b/backend/tests/contract/test_openapi_surface.py @@ -95,6 +95,9 @@ ("get", "/api/v1/studies/{study_id}/trials", "200"), # feat_overnight_autopilot (PR #343) — auto-followup chain rollup. ("get", "/api/v1/studies/{study_id}/chain", "200"), + # feat_overnight_studies_summary_card (this PR) — recent-chains discovery + # feeding the "Ran while you were away" card on /studies. + ("get", "/api/v1/studies/chains/recent", "200"), # ----- /api/v1/proposals (feat_digest_proposal + feat_github_pr_worker) ----- ("get", "/api/v1/studies/{study_id}/digest", "200"), ("post", "/api/v1/proposals", "201"), diff --git a/backend/tests/contract/test_studies_chain_recent_contract.py b/backend/tests/contract/test_studies_chain_recent_contract.py new file mode 100644 index 00000000..a4d3a1a4 --- /dev/null +++ b/backend/tests/contract/test_studies_chain_recent_contract.py @@ -0,0 +1,154 @@ +# SPDX-FileCopyrightText: 2026 soundminds.ai +# +# SPDX-License-Identifier: Apache-2.0 + +"""Contract tests for ``GET /api/v1/studies/chains/recent`` +(feat_overnight_studies_summary_card Story 1.2). + +Pure-contract layer (no DB / Redis / engine): asserts the response-model +shapes (top-level keys + row's 10-field set), the ``stop_reason`` enum +matches ``CHAIN_STOP_REASONS``, the ``direction`` enum is two-valued, the +endpoint's presence in the OpenAPI document, the canonical 422 +``VALIDATION_ERROR`` envelope shape for a malformed ``since`` (AC-6), and +the ``X-Total-Count`` header is emitted on the happy path. +""" + +from __future__ import annotations + +import typing + +import httpx +import pytest +from asgi_lifespan import LifespanManager + +from backend.app.api.v1.schemas import RecentChainsResponse, RecentChainSummary +from backend.app.domain.study.chain_summary import CHAIN_STOP_REASONS, ChainStopReason + + +def test_recent_chains_response_top_level_keys() -> None: + assert set(RecentChainsResponse.model_fields) == { + "data", + "next_cursor", + "has_more", + } + + +def test_recent_chain_summary_ten_fields() -> None: + assert set(RecentChainSummary.model_fields) == { + "anchor_study_id", + "anchor_name", + "chain_length", + "best_metric", + "objective_metric", + "cumulative_lift", + "direction", + "stop_reason", + "best_link_proposal_id", + "tail_completed_at", + } + assert len(RecentChainSummary.model_fields) == 10 + + +def test_recent_chain_summary_stop_reason_literal_matches_frozenset() -> None: + annotation = RecentChainSummary.model_fields["stop_reason"].annotation + literal_values = set(typing.get_args(annotation)) + assert literal_values == set(CHAIN_STOP_REASONS) + assert literal_values == { + "depth_exhausted", + "no_lift", + "budget", + "parent_failed", + "cancelled", + "in_flight", + } + + +def test_recent_chain_summary_direction_literal_values() -> None: + annotation = RecentChainSummary.model_fields["direction"].annotation + assert set(typing.get_args(annotation)) == {"maximize", "minimize"} + + +def test_stop_reason_literal_export_unchanged() -> None: + """Sanity: the ``ChainStopReason`` Literal exported from + chain_summary.py mirrors the canonical CHAIN_STOP_REASONS frozenset. + This is the source-of-truth comment cited by the frontend's + STOP_REASON_PHRASE map (FR-4). + """ + assert set(typing.get_args(ChainStopReason)) == set(CHAIN_STOP_REASONS) + + +def test_endpoint_present_in_openapi() -> None: + from backend.app.main import app + + schema = app.openapi() + path = "/api/v1/studies/chains/recent" + assert path in schema["paths"] + assert "get" in schema["paths"][path] + responses = schema["paths"][path]["get"]["responses"] + assert "200" in responses + # query params: since (optional), limit (default 20, ge=1, le=50) + params = {p["name"]: p for p in schema["paths"][path]["get"].get("parameters", [])} + assert "since" in params + assert "limit" in params + assert params["limit"]["schema"]["default"] == 20 + assert params["limit"]["schema"]["minimum"] == 1 + assert params["limit"]["schema"]["maximum"] == 50 + + +@pytest.mark.asyncio +async def test_x_total_count_header_emitted() -> None: + """The endpoint emits ``X-Total-Count = len(data)`` on the happy + path (with no chains seeded the count is 0 — same contract). + """ + from backend.app.main import app + + async with LifespanManager(app): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=app), base_url="http://test" + ) as client: + resp = await client.get("/api/v1/studies/chains/recent") + # We tolerate either 200 (DB up) or 5xx (DB down) — only + # assert the header is present when the request actually + # succeeded. The header MUST be string-encoded. + if resp.status_code == 200: + assert "X-Total-Count" in resp.headers + assert resp.headers["X-Total-Count"].isdigit() + + +@pytest.mark.asyncio +async def test_malformed_since_returns_422_validation_error() -> None: + """AC-6: passing a non-ISO ``?since=`` produces the canonical 422 + envelope (auto-emitted by the global validation handler when the + typed ``datetime`` Query param parse fails). + """ + from backend.app.main import app + + async with LifespanManager(app): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=app), base_url="http://test" + ) as client: + resp = await client.get("/api/v1/studies/chains/recent?since=not-a-datetime") + # 422 from FastAPI's typed-query handler; the envelope is the + # project-wide ``detail = {error_code, message, retryable}`` + # shape. + assert resp.status_code == 422 + detail = resp.json()["detail"] + assert detail["error_code"] == "VALIDATION_ERROR" + assert detail["retryable"] is False + + +@pytest.mark.asyncio +async def test_limit_out_of_range_returns_422_validation_error() -> None: + """AC-6 extension: ``?limit=`` out of [1, 50] also produces the + canonical 422 envelope. + """ + from backend.app.main import app + + async with LifespanManager(app): + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=app), base_url="http://test" + ) as client: + resp = await client.get("/api/v1/studies/chains/recent?limit=999") + assert resp.status_code == 422 + detail = resp.json()["detail"] + assert detail["error_code"] == "VALIDATION_ERROR" diff --git a/backend/tests/integration/test_studies_chain_recent_api.py b/backend/tests/integration/test_studies_chain_recent_api.py new file mode 100644 index 00000000..2793fa1c --- /dev/null +++ b/backend/tests/integration/test_studies_chain_recent_api.py @@ -0,0 +1,245 @@ +# SPDX-FileCopyrightText: 2026 soundminds.ai +# +# SPDX-License-Identifier: Apache-2.0 + +"""Integration tests for ``GET /api/v1/studies/chains/recent`` +(feat_overnight_studies_summary_card Story 1.2). + +Exercises the endpoint end-to-end through the real FastAPI app + DB: + +* AC-1 — multi-link chain appears in the response with the right shape +* AC-5 — empty data array → 200 with ``data:[]`` +* AC-11 — terminal-failed chain returns with null metric fields +* Route-order collision regression — ``/studies/chains/recent`` hits the + new handler, not ``get_study_detail`` with ``study_id="chains"`` +""" + +from __future__ import annotations + +import uuid +from datetime import UTC, datetime, timedelta +from typing import Any + +import httpx +import pytest + +from backend.app.db import repo +from backend.app.db.session import get_session_factory +from backend.tests.conftest import postgres_reachable + +pytestmark = [ + pytest.mark.integration, + pytest.mark.skipif( + not postgres_reachable(), + reason="Postgres not reachable — see docs/03_runbooks/local-dev.md", + ), +] + +_BASE = datetime(2026, 5, 31, tzinfo=UTC) + + +def _uuid() -> str: + return str(uuid.uuid4()) + + +async def _seed_fixtures(db: Any) -> dict[str, str]: + cluster = await repo.create_cluster( + db, + id=_uuid(), + name=f"rc-c-{uuid.uuid4().hex[:8]}", + engine_type="elasticsearch", + environment="dev", + base_url="http://stub:9200", + auth_kind="es_basic", + credentials_ref="ref", + ) + template = await repo.create_query_template( + db, + id=_uuid(), + name=f"rc-qt-{uuid.uuid4().hex[:8]}", + engine_type="elasticsearch", + body="{}", + declared_params={}, + ) + query_set = await repo.create_query_set( + db, id=_uuid(), name=f"rc-qs-{uuid.uuid4().hex[:8]}", cluster_id=cluster.id + ) + jl = await repo.create_judgment_list( + db, + id=_uuid(), + name=f"rc-jl-{uuid.uuid4().hex[:8]}", + query_set_id=query_set.id, + cluster_id=cluster.id, + target="products", + rubric="rate", + status="complete", + ) + return { + "cluster_id": cluster.id, + "template_id": template.id, + "query_set_id": query_set.id, + "judgment_list_id": jl.id, + } + + +async def _seed_study( + db: Any, + fx: dict[str, str], + *, + parent_study_id: str | None = None, + status: str = "completed", + best_metric: float | None = None, + baseline_metric: float | None = None, + created_at: datetime | None = None, + completed_at: datetime | None = None, + name: str | None = None, + config: dict[str, Any] | None = None, +) -> str: + sid = _uuid() + await repo.create_study( + db, + id=sid, + name=name if name is not None else f"rc-study-{sid[:8]}", + cluster_id=fx["cluster_id"], + target="products", + template_id=fx["template_id"], + query_set_id=fx["query_set_id"], + judgment_list_id=fx["judgment_list_id"], + search_space={}, + objective={"metric": "ndcg", "k": 10, "direction": "maximize"}, + config=config if config is not None else {}, + status=status, + optuna_study_name=sid, + parent_study_id=parent_study_id, + best_metric=best_metric, + baseline_metric=baseline_metric, + created_at=created_at if created_at is not None else _BASE, + completed_at=completed_at, + ) + return sid + + +async def test_recent_chains_returns_chain_ac1(async_client: httpx.AsyncClient) -> None: + """AC-1: a 3-link chain shows up exactly once with anchor identity, + chain length, best-link metric, derived stop reason, and a non-null + ``tail_completed_at``. + """ + factory = get_session_factory() + async with factory() as db: + fx = await _seed_fixtures(db) + anchor = await _seed_study( + db, + fx, + name="Anchor study", + best_metric=0.65, + baseline_metric=0.60, + created_at=_BASE, + completed_at=_BASE + timedelta(minutes=5), + ) + mid = await _seed_study( + db, + fx, + parent_study_id=anchor, + best_metric=0.72, + created_at=_BASE + timedelta(hours=1), + completed_at=_BASE + timedelta(hours=1, minutes=5), + ) + await _seed_study( + db, + fx, + parent_study_id=mid, + best_metric=0.74, + created_at=_BASE + timedelta(hours=2), + completed_at=_BASE + timedelta(hours=2, minutes=5), + config={"auto_followup_depth": 0}, + ) + await db.commit() + + resp = await async_client.get("/api/v1/studies/chains/recent") + assert resp.status_code == 200 + assert resp.headers.get("X-Total-Count") == "1" + body = resp.json() + assert body["next_cursor"] is None + assert body["has_more"] is False + assert len(body["data"]) == 1 + row = body["data"][0] + assert row["anchor_study_id"] == anchor + assert row["anchor_name"] == "Anchor study" + assert row["chain_length"] == 3 + assert row["best_metric"] == pytest.approx(0.74) + assert row["objective_metric"] == "ndcg" + assert row["cumulative_lift"] == pytest.approx(0.14) + assert row["direction"] == "maximize" + assert row["stop_reason"] == "depth_exhausted" + assert row["best_link_proposal_id"] is None + # tail completed_at is the LAST link's completed_at, surfaced as ISO. + assert row["tail_completed_at"].startswith("2026-05-31T02:05") + + +async def test_recent_chains_empty_returns_200_ac5(async_client: httpx.AsyncClient) -> None: + """AC-5: with no chains in the DB the endpoint returns 200 with an + empty data array and ``X-Total-Count: 0``. + """ + resp = await async_client.get("/api/v1/studies/chains/recent") + assert resp.status_code == 200 + assert resp.headers.get("X-Total-Count") == "0" + body = resp.json() + assert body == {"data": [], "next_cursor": None, "has_more": False} + + +async def test_recent_chains_failed_tail_null_metric_ac11(async_client: httpx.AsyncClient) -> None: + """AC-11: a chain whose terminal tail is ``failed`` (no best_metric) + is returned with ``best_metric: null`` and ``cumulative_lift: null``. + The stop reason resolves to ``parent_failed`` so the card can render + the failure phrase in place of the numeric row. + """ + factory = get_session_factory() + async with factory() as db: + fx = await _seed_fixtures(db) + anchor = await _seed_study( + db, + fx, + best_metric=0.6, + baseline_metric=0.5, + created_at=_BASE, + completed_at=_BASE + timedelta(minutes=5), + ) + await _seed_study( + db, + fx, + parent_study_id=anchor, + status="failed", + best_metric=None, + created_at=_BASE + timedelta(hours=1), + completed_at=_BASE + timedelta(hours=1, minutes=5), + ) + await db.commit() + + resp = await async_client.get("/api/v1/studies/chains/recent") + assert resp.status_code == 200 + body = resp.json() + assert len(body["data"]) == 1 + row = body["data"][0] + assert row["anchor_study_id"] == anchor + assert row["chain_length"] == 2 + # The anchor's best_metric is non-null so the "best of completed + # subset" picks the anchor — `best_metric` is the anchor's value, + # `cumulative_lift` is anchor − baseline. + assert row["best_metric"] == pytest.approx(0.6) + assert row["cumulative_lift"] == pytest.approx(0.1) + assert row["stop_reason"] == "parent_failed" + + +async def test_recent_chains_route_order_collision(async_client: httpx.AsyncClient) -> None: + """Route-order regression: ``/studies/chains/recent`` MUST hit + ``get_recent_chains``, not ``get_study_detail`` with + ``study_id="chains"``. The former returns 200 + the documented body + shape; the latter would 404 with ``STUDY_NOT_FOUND``. + """ + resp = await async_client.get("/api/v1/studies/chains/recent") + assert resp.status_code == 200 + body = resp.json() + # The recent-chains shape has a `data` key; STUDY_NOT_FOUND would + # carry a top-level `detail.error_code` envelope. + assert "data" in body + assert "detail" not in body diff --git a/backend/tests/integration/test_studies_chain_recent_repo.py b/backend/tests/integration/test_studies_chain_recent_repo.py new file mode 100644 index 00000000..98348a8c --- /dev/null +++ b/backend/tests/integration/test_studies_chain_recent_repo.py @@ -0,0 +1,485 @@ +# SPDX-FileCopyrightText: 2026 soundminds.ai +# +# SPDX-License-Identifier: Apache-2.0 + +"""Integration tests for ``list_recent_completed_chains`` +(feat_overnight_studies_summary_card Story 1.1). + +Exercises the recent-completed-chains discovery repo helper against the +real test Postgres. Covers the spec ACs the repo layer owns: + +* AC-12 — multi-link chain returned exactly once (anchor dedup) +* AC-2 — single-study (length 1) excluded +* AC-3 — ``since`` boundary filter +* AC-4 — chain with an in-flight interior link excluded +* AC-11 — terminal-failed chain returned with its derived shape intact +* Plus the concurrent-delete safety net: a candidate whose chain is + hard-deleted between the candidate query and the traversal is skipped, + never raised. +""" + +from __future__ import annotations + +import uuid +from datetime import UTC, datetime, timedelta +from typing import Any + +import pytest +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession + +from backend.app.db import repo + +_BASE = datetime(2026, 5, 31, tzinfo=UTC) + + +def _uuid() -> str: + return str(uuid.uuid4()) + + +async def _seed_cluster(db: AsyncSession) -> str: + cluster = await repo.create_cluster( + db, + id=_uuid(), + name=f"c-{_uuid()[:8]}", + engine_type="elasticsearch", + environment="dev", + base_url="http://x:9200", + auth_kind="es_basic", + credentials_ref="ref", + ) + return cluster.id + + +async def _seed_template(db: AsyncSession) -> str: + template = await repo.create_query_template( + db, + id=_uuid(), + name=f"qt-{_uuid()[:8]}", + engine_type="elasticsearch", + body="{}", + declared_params={}, + ) + return template.id + + +class _Fixtures: + def __init__(self, cluster_id: str, template_id: str, query_set_id: str, jl_id: str) -> None: + self.cluster_id = cluster_id + self.template_id = template_id + self.query_set_id = query_set_id + self.judgment_list_id = jl_id + + +async def _seed_fixtures(db: AsyncSession) -> _Fixtures: + cluster_id = await _seed_cluster(db) + template_id = await _seed_template(db) + query_set = await repo.create_query_set( + db, id=_uuid(), name=f"qs-{_uuid()[:8]}", cluster_id=cluster_id + ) + jl = await repo.create_judgment_list( + db, + id=_uuid(), + name=f"jl-{_uuid()[:8]}", + query_set_id=query_set.id, + cluster_id=cluster_id, + target="products", + rubric="rate", + status="complete", + ) + return _Fixtures(cluster_id, template_id, query_set.id, jl.id) + + +async def _seed_study( + db: AsyncSession, + *, + cluster_id: str, + template_id: str, + query_set_id: str, + judgment_list_id: str, + parent_study_id: str | None = None, + status: str = "completed", + best_metric: float | None = None, + baseline_metric: float | None = None, + created_at: datetime | None = None, + completed_at: datetime | None = None, + config: dict[str, Any] | None = None, +) -> str: + sid = _uuid() + await repo.create_study( + db, + id=sid, + name=f"study-{sid[:8]}", + cluster_id=cluster_id, + target="products", + template_id=template_id, + query_set_id=query_set_id, + judgment_list_id=judgment_list_id, + search_space={}, + objective={"metric": "ndcg", "k": 10, "direction": "maximize"}, + config=config if config is not None else {}, + status=status, + optuna_study_name=sid, + parent_study_id=parent_study_id, + best_metric=best_metric, + baseline_metric=baseline_metric, + created_at=created_at if created_at is not None else _BASE, + completed_at=completed_at, + ) + return sid + + +@pytest.mark.integration +class TestListRecentCompletedChains: + async def test_no_chains_returns_empty(self, db_session: AsyncSession) -> None: + result = await repo.list_recent_completed_chains(db_session, since=None, limit=20) + assert result == [] + + async def test_three_link_chain_returned_once_with_correct_anchor( + self, db_session: AsyncSession + ) -> None: + """AC-12: dedup — a 3-link chain shows up as exactly ONE row keyed + on the anchor, even though candidates B and C both qualify. + """ + fx = await _seed_fixtures(db_session) + anchor = await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + best_metric=0.65, + baseline_metric=0.60, + created_at=_BASE, + completed_at=_BASE + timedelta(minutes=5), + ) + mid = await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + parent_study_id=anchor, + best_metric=0.72, + created_at=_BASE + timedelta(hours=1), + completed_at=_BASE + timedelta(hours=1, minutes=5), + ) + tail = await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + parent_study_id=mid, + best_metric=0.74, + created_at=_BASE + timedelta(hours=2), + completed_at=_BASE + timedelta(hours=2, minutes=5), + ) + await db_session.commit() + + result = await repo.list_recent_completed_chains(db_session, since=None, limit=20) + + assert len(result) == 1 + traversal = result[0] + assert traversal.anchor_id == anchor + assert [s.id for s in traversal.links] == [anchor, mid, tail] + + async def test_single_study_excluded(self, db_session: AsyncSession) -> None: + """AC-2: a study with no parent (chain length 1) must NOT appear.""" + fx = await _seed_fixtures(db_session) + await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + best_metric=0.7, + baseline_metric=0.6, + completed_at=_BASE + timedelta(minutes=5), + ) + await db_session.commit() + + result = await repo.list_recent_completed_chains(db_session, since=None, limit=20) + assert result == [] + + async def test_since_boundary_excludes_older_chain(self, db_session: AsyncSession) -> None: + """AC-3: ``since`` filters chains whose terminal members completed + before the cutoff. Inclusive at the cutoff (``completed_at >= since``). + """ + fx = await _seed_fixtures(db_session) + old_anchor = await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + best_metric=0.6, + baseline_metric=0.5, + created_at=_BASE, + completed_at=_BASE + timedelta(minutes=5), + ) + await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + parent_study_id=old_anchor, + best_metric=0.65, + created_at=_BASE + timedelta(hours=1), + completed_at=_BASE + timedelta(hours=1), # before `since` cutoff + ) + new_anchor = await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + best_metric=0.7, + baseline_metric=0.6, + created_at=_BASE + timedelta(hours=10), + completed_at=_BASE + timedelta(hours=10, minutes=5), + ) + await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + parent_study_id=new_anchor, + best_metric=0.74, + created_at=_BASE + timedelta(hours=11), + completed_at=_BASE + timedelta(hours=11, minutes=5), + ) + await db_session.commit() + + cutoff = _BASE + timedelta(hours=5) + result = await repo.list_recent_completed_chains(db_session, since=cutoff, limit=20) + + assert len(result) == 1 + assert result[0].anchor_id == new_anchor + + async def test_in_flight_chain_excluded(self, db_session: AsyncSession) -> None: + """AC-4: a chain whose interior link is still running must NOT + appear. The candidate query already excludes non-terminal tails; + this test exercises the defensive in-flight skip on the resolved + traversal. + """ + fx = await _seed_fixtures(db_session) + anchor = await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + best_metric=0.6, + baseline_metric=0.5, + created_at=_BASE, + completed_at=_BASE + timedelta(minutes=5), + ) + # Mid link terminated and qualifies as a candidate via its + # completed_at + parent_study_id IS NOT NULL. Tail is still running + # → derive_chain_stop_reason returns "in_flight" → excluded. + await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + parent_study_id=anchor, + best_metric=0.7, + created_at=_BASE + timedelta(hours=1), + completed_at=_BASE + timedelta(hours=1, minutes=5), + ) + mid = ( + await db_session.execute( + text("SELECT id FROM studies WHERE parent_study_id = :a"), {"a": anchor} + ) + ).scalar_one() + await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + parent_study_id=mid, + status="running", + best_metric=None, + created_at=_BASE + timedelta(hours=2), + completed_at=None, + ) + await db_session.commit() + + result = await repo.list_recent_completed_chains(db_session, since=None, limit=20) + assert result == [] + + async def test_terminal_failed_chain_returned(self, db_session: AsyncSession) -> None: + """AC-11 data path: a chain whose tail is terminal-failed (parent + followup failed → chain terminated) is included in the result + with its derived shape intact (failed tail, no best_metric). + """ + fx = await _seed_fixtures(db_session) + anchor = await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + best_metric=0.6, + baseline_metric=0.5, + created_at=_BASE, + completed_at=_BASE + timedelta(minutes=5), + ) + await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + parent_study_id=anchor, + status="failed", + best_metric=None, + created_at=_BASE + timedelta(hours=1), + completed_at=_BASE + timedelta(hours=1, minutes=5), + ) + await db_session.commit() + + result = await repo.list_recent_completed_chains(db_session, since=None, limit=20) + + assert len(result) == 1 + traversal = result[0] + assert traversal.anchor_id == anchor + assert len(traversal.links) == 2 + # Tail status drives the downstream stop_reason → "parent_failed". + assert traversal.links[-1].status == "failed" + assert traversal.links[-1].best_metric is None + + async def test_concurrent_anchor_delete_skipped( + self, + db_session: AsyncSession, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Concurrent-delete safety net — a candidate whose chain becomes + unresolvable between the candidate query and the traversal call + (``get_chain_for_study`` returns ``None``) must be skipped + silently, never raised. Mirrors the chain-panel defensive skip + at ``study.py:327-333``. + + We can't reproduce the orphan-anchor scenario directly in + Postgres because the ``studies.parent_study_id`` self-FK blocks + deleting an anchor that still has children. So we patch + ``get_chain_for_study`` to return ``None`` for one specific id + and assert the helper skips that candidate and still returns the + surviving chain. + """ + from backend.app.db.repo import study as study_repo + + fx = await _seed_fixtures(db_session) + # Surviving chain we expect to see in the result. + survive_anchor = await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + best_metric=0.7, + baseline_metric=0.6, + created_at=_BASE, + completed_at=_BASE + timedelta(minutes=5), + ) + await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + parent_study_id=survive_anchor, + best_metric=0.75, + created_at=_BASE + timedelta(hours=1), + completed_at=_BASE + timedelta(hours=1, minutes=5), + ) + # Disposable chain whose tail we'll force the traversal to fail + # for (race-window stand-in). Tail completed AFTER the surviving + # chain so it sorts first in the candidate query, exercising the + # skip BEFORE the surviving candidate is processed. + doomed_anchor = await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + best_metric=0.5, + baseline_metric=0.4, + created_at=_BASE + timedelta(hours=2), + completed_at=_BASE + timedelta(hours=2, minutes=5), + ) + doomed_child_id = await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + parent_study_id=doomed_anchor, + best_metric=0.55, + created_at=_BASE + timedelta(hours=3), + completed_at=_BASE + timedelta(hours=3, minutes=5), + ) + await db_session.commit() + + # Patch get_chain_for_study to simulate the concurrent-delete race + # for the doomed child specifically. + real_get_chain = study_repo.get_chain_for_study + + async def patched_get_chain( + db: AsyncSession, study_id: str + ) -> study_repo.ChainTraversalResult | None: + if study_id == doomed_child_id: + return None + return await real_get_chain(db, study_id) + + monkeypatch.setattr(study_repo, "get_chain_for_study", patched_get_chain) + + result = await repo.list_recent_completed_chains(db_session, since=None, limit=20) + + # The doomed candidate is silently skipped; the surviving chain + # remains. + assert len(result) == 1 + assert result[0].anchor_id == survive_anchor + + async def test_limit_caps_distinct_chains(self, db_session: AsyncSession) -> None: + """Sanity: ``limit`` actually caps the number of distinct chains + returned, AND the candidate scan-cap (``limit * 5``) is generous + enough to fill a small limit even when every chain is minimum-length. + """ + fx = await _seed_fixtures(db_session) + anchors: list[str] = [] + for i in range(5): + anchor = await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + best_metric=0.6, + baseline_metric=0.5, + created_at=_BASE + timedelta(hours=10 * i), + completed_at=_BASE + timedelta(hours=10 * i, minutes=5), + ) + await _seed_study( + db_session, + cluster_id=fx.cluster_id, + template_id=fx.template_id, + query_set_id=fx.query_set_id, + judgment_list_id=fx.judgment_list_id, + parent_study_id=anchor, + best_metric=0.7, + created_at=_BASE + timedelta(hours=10 * i + 1), + completed_at=_BASE + timedelta(hours=10 * i + 1, minutes=5), + ) + anchors.append(anchor) + await db_session.commit() + + result = await repo.list_recent_completed_chains(db_session, since=None, limit=3) + assert len(result) == 3 + # Newest-first by tail completion → anchors[4], [3], [2]. + assert [t.anchor_id for t in result] == [anchors[4], anchors[3], anchors[2]] diff --git a/docs/00_overview/BACKLOG_DASHBOARD.md b/docs/00_overview/BACKLOG_DASHBOARD.md index 4db02d84..deef4091 100644 --- a/docs/00_overview/BACKLOG_DASHBOARD.md +++ b/docs/00_overview/BACKLOG_DASHBOARD.md @@ -20,15 +20,15 @@ Implementation in progress — resume to finish | Metric | Value | |---|---| -| Filed under BACKLOG | **3** folders total (done + specced not-done + idea backlog + bugs) | +| Filed under BACKLOG | **5** folders total (done + specced not-done + idea backlog + bugs) | | Specced features done | **0 / 1** (0%) — of features *past the idea stage* (those with a spec); the idea backlog below is NOT in this denominator, so 100% ≠ release complete | -| Pending work | **3** items (every not-done feat/infra/chore/bug across all priorities) | +| Pending work | **5** items (every not-done feat/infra/chore/bug across all priorities) | | → P0 — do next | **0** unblocking / paying daily cost | | → P1 | **0** high-value, ready when P0 clears | | → P2 (default) | 2 important to file, not blocking | -| → Backlog | 1 captured for record, not planned | +| → Backlog | 3 captured for record, not planned | | Open bugs | 1 | -| Legacy "Path to BACKLOG" | 3 items — scoped-not-done + bugs + chore-ideas only (excludes feat/infra ideas) | +| Legacy "Path to BACKLOG" | 5 items — scoped-not-done + bugs + chore-ideas only (excludes feat/infra ideas) | | Backlog ideas | 0 idea-only feat/infra (not yet scoped into BACKLOG) | | In flight | 1 feature(s) actively shipping | @@ -52,12 +52,14 @@ _None._ _None._ -### Idea (2) +### Idea (4) | # | Priority | Feature | Type | One-liner | Depends on | Status | |---|---|---|---|---|---|---| | 1 | P2 | [bug_starlette_request_poisons_fastapi_depends_tests](planned_features/99_backlog/bug_starlette_request_poisons_fastapi_depends_tests/idea.md) | Bug | There is shared state somewhere in starlette / FastAPI that is mutated by `Request(scope={"type": "http", ...})` and breaks subsequent `Depends` resolution. Possible suspects: | — | Idea — bug captured during feat_index_document_browser Story 2.1 | | 2 | Backlog | [chore_demo_reseed_stale_recovery_atomic_cas](planned_features/99_backlog/chore_demo_reseed_stale_recovery_atomic_cas/idea.md) | Chore | PR #299 added stale-status auto-recovery to the demo-reseed POST handler ([`_test.py`](../backend/app/api/v1/_test.py)): when the Redis status is `running` but `started_at` is older than `DEMO_RESEED_ | — | Idea — captured during PR #299 GPT-5.5 final review (finding #2, adjudicated non-regression) | +| 3 | Backlog | [chore_studies_chain_recent_indexes](planned_features/99_backlog/chore_studies_chain_recent_indexes/idea.md) | Chore | `list_recent_completed_chains` at [`backend/app/db/repo/study.py`](planned_features/implemented_features/2026_06_04_feat_overnight_studies_summary_card) executes a single SQL query of the shape: | — | Idea — deferred follow-on from `feat_overnight_studies_summary_card` Story 1.1 | +| 4 | Backlog | [chore_studies_chain_recent_keyset_pagination](planned_features/99_backlog/chore_studies_chain_recent_keyset_pagination/idea.md) | Chore | `GET /api/v1/studies/chains/recent` ships with a fixed `limit` ceiling of 50 and inert pagination fields kept on the wire for forward compatibility — `next_cursor` is always `null`, `has_more` is alwa | — | Idea — deferred follow-on from `feat_overnight_studies_summary_card` Story 1.2 | ## Dependency graph diff --git a/docs/00_overview/MVP2_DASHBOARD.md b/docs/00_overview/MVP2_DASHBOARD.md index ddd76daa..69f7704e 100644 --- a/docs/00_overview/MVP2_DASHBOARD.md +++ b/docs/00_overview/MVP2_DASHBOARD.md @@ -20,16 +20,16 @@ Plan approved; run /impl-execute to ship | Metric | Value | |---|---| -| Filed under MVP2 | **49** folders total (done + specced not-done + idea backlog + bugs) | +| Filed under MVP2 | **50** folders total (done + specced not-done + idea backlog + bugs) | | Specced features done | **16 / 26** (62%) — of features *past the idea stage* (those with a spec); the idea backlog below is NOT in this denominator, so 100% ≠ release complete | -| Pending work | **31** items (every not-done feat/infra/chore/bug across all priorities) | +| Pending work | **32** items (every not-done feat/infra/chore/bug across all priorities) | | → P0 — do next | **0** unblocking / paying daily cost | | → P1 | **0** high-value, ready when P0 clears | -| → P2 (default) | 26 important to file, not blocking | +| → P2 (default) | 27 important to file, not blocking | | → Backlog | 5 captured for record, not planned | | Open bugs | 9 | | Legacy "Path to MVP2" | 26 items — scoped-not-done + bugs + chore-ideas only (excludes feat/infra ideas) | -| Backlog ideas | 5 idea-only feat/infra (not yet scoped into MVP2) | +| Backlog ideas | 6 idea-only feat/infra (not yet scoped into MVP2) | | In flight | 0 feature(s) actively shipping | ## Pipeline @@ -82,29 +82,30 @@ _None._ _None._ -### Idea (19) +### Idea (20) | # | Priority | Feature | Type | One-liner | Depends on | Status | |---|---|---|---|---|---|---| | 1 | P2 | [feat_proposal_full_param_space_view](planned_features/02_mvp2/feat_proposal_full_param_space_view/idea.md) | Feature | The proposal detail page surfaces `config_diff` — the subset of parameters the study **tuned** — and the winning values for them. Today's example proposal carries `{boost: {from: 1.0, to: 2.5}}` and r | — | Idea — user request during the same session as `feat_overnight_final_solution` | -| 2 | P2 | [infra_smoke_fork_pr_secret_skip](planned_features/02_mvp2/infra_smoke_fork_pr_secret_skip/idea.md) | Infra | `.github/workflows/pr.yml` triggers on `pull_request:` ([pr.yml:43](../.github/workflows/pr.yml)) — **not** `pull_request_target`. GitHub deliberately withholds repository secrets from workflows trigg | — | Idea — tangential discovery while merging PR #387 (`chore_arq_pool_aclose_deprecation`) | -| 3 | P2 | [chore_demo_reseed_partial_completion_fast_test](planned_features/02_mvp2/chore_demo_reseed_partial_completion_fast_test/idea.md) | Chore | `infra_solr_ci_readiness` made the demo reseed engine-tolerant: when an engine is unreachable, its scenario is skipped, the reseed completes with `status="complete"` and a non-empty `scenarios_skipped | — | Idea — tangential discovery during `infra_solr_ci_readiness` Story 1.2 implementation | -| 4 | P2 | [chore_e2e_overnight_strategy_radix_select_timing](planned_features/02_mvp2/chore_e2e_overnight_strategy_radix_select_timing/idea.md) | Chore | The Story 3.2 E2E spec walks the create-study wizard to Step 5, clicks the depth `` becomes visible. In chromium against `pnpm dev`, t | — | Idea — tangential follow-up captured during `feat_overnight_final_solution` Story 3.2 implementation | -| 5 | P2 | [chore_overnight_result_card_screenshot](planned_features/02_mvp2/chore_overnight_result_card_screenshot/idea.md) | Chore | The `docs/08_guides/tutorial-first-study.md` Step 12 sub-section *"In the morning — read the overnight result card"* shipped on PR #442 with prose only — no… | — | Idea — deferred FR-9 deliverable from PR #442 | -| 6 | P2 | [chore_pr_yml_parallelize_backend_job](planned_features/02_mvp2/chore_pr_yml_parallelize_backend_job/idea.md) | Chore | `.github/workflows/pr.yml` has a job named `backend (lint + typecheck + tests + coverage)` that runs four sequential things in one job: ruff/lint, mypy, the full pytest matrix (unit + integration + co | — | Idea — captured during PR #426 CI watch | -| 7 | P2 | [chore_solr_post_pipeline_followups](planned_features/02_mvp2/chore_solr_post_pipeline_followups/idea.md) | Chore | The 13-story `infra_adapter_solr` execution surfaced several follow-on items that fit neither the original spec nor any sister feature folder. None block the MVP2 Solr release — they're operator-exper | — | Idea — tangential observations from `infra_adapter_solr` end-to-end | -| 8 | P2 | [chore_ubi_hybrid_template_render](planned_features/02_mvp2/chore_ubi_hybrid_template_render/idea.md) | Chore | Idea — contract decision deferred (NOT a worker bug) | — | Idea — contract decision deferred (NOT a worker bug) | -| 9 | P2 | [bug_e2e_teardown_chain_node_delete_500](planned_features/02_mvp2/bug_e2e_teardown_chain_node_delete_500/idea.md) | Bug | The E2E global-teardown deletes seeded rows in a fixed order (per `chore_e2e_test_rows_isolation` Story 1.2 cleanup registration). For auto-followup **chains**, the seeded nodes are `queued` studies c | — | Idea — tangential discovery during `feat_overnight_autopilot` (Story 4.2 E2E, PR forthcoming) | -| 10 | P2 | [bug_relyloop_spec_ubi_section_drift](planned_features/02_mvp2/bug_relyloop_spec_ubi_section_drift/idea.md) | Bug | [`docs/00_overview/relyloop-spec.md`](relyloop-spec.md) §"Click-derived judgments — OpenSearch UBI as the engine-neutral primary path" (line ~706) carries two staleness bugs from the 2026-05-27 releas | — | Idea — captured during `feat_ubi_judgments` preflight (2026-05-29) | -| 11 | P2 | [bug_reseed_failure_blocks_retry_arq_singleton_dedup](planned_features/02_mvp2/bug_reseed_failure_blocks_retry_arq_singleton_dedup/idea.md) | Bug | `run_demo_reseed` is enqueued with a fixed Arq job id `demo_reseed:singleton` (the singleton concurrency guard). When a run reaches a terminal state, Arq stores its **result** under `arq:result:demo_r | — | Idea — tangential discovery while verifying `fix(demo): add Solr (8983) to the reseed engine host-URL mapping` (branch `feat_demo_reseed_solr_and_steplog`) | -| 12 | P2 | [bug_seed_meaningful_demos_silent_bulk_errors](planned_features/02_mvp2/bug_seed_meaningful_demos_silent_bulk_errors/idea.md) | Bug | [`scripts/seed_meaningful_demos.py:917-935`](../../scripts/seed_meaningful_demos.py#L917-L935) bulk-indexes 1000 Amazon ESCI products into a dedicated index per demo scenario: | — | Idea — captured during `bug_smoke_seed_es_unavailable_shards_race` Phase 2.5 tangential sweep | -| 13 | P2 | [bug_studies_detail_vitest_intermittent_timeout](planned_features/02_mvp2/bug_studies_detail_vitest_intermittent_timeout/idea.md) | Bug | Under the full `pnpm test` run (`vitest run`, default worker pool), the Study-detail-page render test sometimes blocks past the 5 s `testTimeout` default — but the test itself is data-driven from mock | — | Idea — captured during `chore_template_library_expansion` post-impl tangential sweep | -| 14 | P2 | [bug_webhook_concurrent_merge_race_timing_sensitive](planned_features/02_mvp2/bug_webhook_concurrent_merge_race_timing_sensitive/idea.md) | Bug | Idea — surfaced during `bug_demo_clusters_unreachable_in_healthz` PR #236 CI. | — | Idea — surfaced during `bug_demo_clusters_unreachable_in_healthz` PR #236 CI. | -| 15 | Backlog | [feat_fts_rank_ordering](planned_features/02_mvp2/feat_fts_rank_ordering/idea.md) | Feature | `feat_data_table_primitive` shipped filter-only FTS — `?q=foo` matches rows where `search_vector @@ plainto_tsquery('english', 'foo')` is true but orders results by `created_at DESC, id DESC` (the def | — | Idea — deferred from `feat_data_table_primitive` (MVP1) per spec §16. | -| 16 | Backlog | [feat_overnight_final_solution_phase3](planned_features/02_mvp2/feat_overnight_final_solution_phase3/idea.md) | Feature | When `follow_suggestions` runs a 4-link chain, today's proposal-creation logic ([`backend/workers/orchestrator.py`](../backend/workers/orchestrator.py) `_on_study_complete`) creates **one `pending` pr | — | Idea — deferred Phase 3 from `feat_overnight_final_solution` Phase 1 spec | -| 17 | Backlog | [infra_arq_subprocess_test](planned_features/02_mvp2/infra_arq_subprocess_test/idea.md) | Infra | Idea (deferred from `feat_study_lifecycle` Phase 2 / PR #25 final GPT-5.5 review). Still applicable as of 2026-05-14: the three in-process tests cited below still cover the resume contract correctly; | — | Idea (deferred from `feat_study_lifecycle` Phase 2 / PR #25 final GPT-5.5 review). Still applicable as of 2026-05-14: the three in-process tests cited below still cover the resume contract correctly; a subprocess test would add a narrow Arq-version-regression guard. | -| 18 | Backlog | [chore_auto_followup_parent_advisory_lock](planned_features/02_mvp2/chore_auto_followup_parent_advisory_lock/idea.md) | Chore | The shipped `feat_auto_followup_studies` worker uses a two-layer idempotency scheme: | — | Idea — captured as a standalone file to resolve broken cross-references in `feat_auto_followup_studies` D-11 + plan F2 + `bug_auto_followup_completed_parent_stop_chain_race/idea.md`. The slug was coined 2026-05-24 in D-11 but only existed as descriptive prose across other documents until now. | -| 19 | Backlog | [bug_chat_long_conversation_truncation](planned_features/02_mvp2/bug_chat_long_conversation_truncation/idea.md) | Bug | [`backend/app/services/agent_chat.send_user_message`](../../backend/app/services/agent_chat.py) defensively caps the OpenAI history at the most recent `HISTORY_MAX_MESSAGES = 100` messages… | — | Held for MVP2 (decided 2026-05-13). Folder renamed with `_mvp2` suffix to make the deferral visible at-a-glance in `ls docs/00_overview/planned_features/`. Resume work when MVP2 starts — no technical dependency on MVP2 infra (audit_log is N/A; Langfuse is convenience only); the deferral is scope discipline + zero current impact (latent bug, no operator has hit the 100-message cap). | +| 2 | P2 | [feat_website_walkthrough_guides](planned_features/02_mvp2/feat_website_walkthrough_guides/idea.md) | Feature | The public website **relyloop.com** is an MkDocs Material site at [`website/`](../../website) (deployed via GitHub Pages by [deploy-docs.yml](../../.github/workflows/deploy-docs.yml), `mkdocs build -- | — | Idea — user request (operator noticed relyloop.com lacks the guides that ship in the running app) | +| 3 | P2 | [infra_smoke_fork_pr_secret_skip](planned_features/02_mvp2/infra_smoke_fork_pr_secret_skip/idea.md) | Infra | `.github/workflows/pr.yml` triggers on `pull_request:` ([pr.yml:43](../.github/workflows/pr.yml)) — **not** `pull_request_target`. GitHub deliberately withholds repository secrets from workflows trigg | — | Idea — tangential discovery while merging PR #387 (`chore_arq_pool_aclose_deprecation`) | +| 4 | P2 | [chore_demo_reseed_partial_completion_fast_test](planned_features/02_mvp2/chore_demo_reseed_partial_completion_fast_test/idea.md) | Chore | `infra_solr_ci_readiness` made the demo reseed engine-tolerant: when an engine is unreachable, its scenario is skipped, the reseed completes with `status="complete"` and a non-empty `scenarios_skipped | — | Idea — tangential discovery during `infra_solr_ci_readiness` Story 1.2 implementation | +| 5 | P2 | [chore_e2e_overnight_strategy_radix_select_timing](planned_features/02_mvp2/chore_e2e_overnight_strategy_radix_select_timing/idea.md) | Chore | The Story 3.2 E2E spec walks the create-study wizard to Step 5, clicks the depth `` becomes visible. In chromium against `pnpm dev`, t | — | Idea — tangential follow-up captured during `feat_overnight_final_solution` Story 3.2 implementation | +| 6 | P2 | [chore_overnight_result_card_screenshot](planned_features/02_mvp2/chore_overnight_result_card_screenshot/idea.md) | Chore | The `docs/08_guides/tutorial-first-study.md` Step 12 sub-section *"In the morning — read the overnight result card"* shipped on PR #442 with prose only — no… | — | Idea — deferred FR-9 deliverable from PR #442 | +| 7 | P2 | [chore_pr_yml_parallelize_backend_job](planned_features/02_mvp2/chore_pr_yml_parallelize_backend_job/idea.md) | Chore | `.github/workflows/pr.yml` has a job named `backend (lint + typecheck + tests + coverage)` that runs four sequential things in one job: ruff/lint, mypy, the full pytest matrix (unit + integration + co | — | Idea — captured during PR #426 CI watch | +| 8 | P2 | [chore_solr_post_pipeline_followups](planned_features/02_mvp2/chore_solr_post_pipeline_followups/idea.md) | Chore | The 13-story `infra_adapter_solr` execution surfaced several follow-on items that fit neither the original spec nor any sister feature folder. None block the MVP2 Solr release — they're operator-exper | — | Idea — tangential observations from `infra_adapter_solr` end-to-end | +| 9 | P2 | [chore_ubi_hybrid_template_render](planned_features/02_mvp2/chore_ubi_hybrid_template_render/idea.md) | Chore | Idea — contract decision deferred (NOT a worker bug) | — | Idea — contract decision deferred (NOT a worker bug) | +| 10 | P2 | [bug_e2e_teardown_chain_node_delete_500](planned_features/02_mvp2/bug_e2e_teardown_chain_node_delete_500/idea.md) | Bug | The E2E global-teardown deletes seeded rows in a fixed order (per `chore_e2e_test_rows_isolation` Story 1.2 cleanup registration). For auto-followup **chains**, the seeded nodes are `queued` studies c | — | Idea — tangential discovery during `feat_overnight_autopilot` (Story 4.2 E2E, PR forthcoming) | +| 11 | P2 | [bug_relyloop_spec_ubi_section_drift](planned_features/02_mvp2/bug_relyloop_spec_ubi_section_drift/idea.md) | Bug | [`docs/00_overview/relyloop-spec.md`](relyloop-spec.md) §"Click-derived judgments — OpenSearch UBI as the engine-neutral primary path" (line ~706) carries two staleness bugs from the 2026-05-27 releas | — | Idea — captured during `feat_ubi_judgments` preflight (2026-05-29) | +| 12 | P2 | [bug_reseed_failure_blocks_retry_arq_singleton_dedup](planned_features/02_mvp2/bug_reseed_failure_blocks_retry_arq_singleton_dedup/idea.md) | Bug | `run_demo_reseed` is enqueued with a fixed Arq job id `demo_reseed:singleton` (the singleton concurrency guard). When a run reaches a terminal state, Arq stores its **result** under `arq:result:demo_r | — | Idea — tangential discovery while verifying `fix(demo): add Solr (8983) to the reseed engine host-URL mapping` (branch `feat_demo_reseed_solr_and_steplog`) | +| 13 | P2 | [bug_seed_meaningful_demos_silent_bulk_errors](planned_features/02_mvp2/bug_seed_meaningful_demos_silent_bulk_errors/idea.md) | Bug | [`scripts/seed_meaningful_demos.py:917-935`](../../scripts/seed_meaningful_demos.py#L917-L935) bulk-indexes 1000 Amazon ESCI products into a dedicated index per demo scenario: | — | Idea — captured during `bug_smoke_seed_es_unavailable_shards_race` Phase 2.5 tangential sweep | +| 14 | P2 | [bug_studies_detail_vitest_intermittent_timeout](planned_features/02_mvp2/bug_studies_detail_vitest_intermittent_timeout/idea.md) | Bug | Under the full `pnpm test` run (`vitest run`, default worker pool), the Study-detail-page render test sometimes blocks past the 5 s `testTimeout` default — but the test itself is data-driven from mock | — | Idea — captured during `chore_template_library_expansion` post-impl tangential sweep | +| 15 | P2 | [bug_webhook_concurrent_merge_race_timing_sensitive](planned_features/02_mvp2/bug_webhook_concurrent_merge_race_timing_sensitive/idea.md) | Bug | Idea — surfaced during `bug_demo_clusters_unreachable_in_healthz` PR #236 CI. | — | Idea — surfaced during `bug_demo_clusters_unreachable_in_healthz` PR #236 CI. | +| 16 | Backlog | [feat_fts_rank_ordering](planned_features/02_mvp2/feat_fts_rank_ordering/idea.md) | Feature | `feat_data_table_primitive` shipped filter-only FTS — `?q=foo` matches rows where `search_vector @@ plainto_tsquery('english', 'foo')` is true but orders results by `created_at DESC, id DESC` (the def | — | Idea — deferred from `feat_data_table_primitive` (MVP1) per spec §16. | +| 17 | Backlog | [feat_overnight_final_solution_phase3](planned_features/02_mvp2/feat_overnight_final_solution_phase3/idea.md) | Feature | When `follow_suggestions` runs a 4-link chain, today's proposal-creation logic ([`backend/workers/orchestrator.py`](../backend/workers/orchestrator.py) `_on_study_complete`) creates **one `pending` pr | — | Idea — deferred Phase 3 from `feat_overnight_final_solution` Phase 1 spec | +| 18 | Backlog | [infra_arq_subprocess_test](planned_features/02_mvp2/infra_arq_subprocess_test/idea.md) | Infra | Idea (deferred from `feat_study_lifecycle` Phase 2 / PR #25 final GPT-5.5 review). Still applicable as of 2026-05-14: the three in-process tests cited below still cover the resume contract correctly; | — | Idea (deferred from `feat_study_lifecycle` Phase 2 / PR #25 final GPT-5.5 review). Still applicable as of 2026-05-14: the three in-process tests cited below still cover the resume contract correctly; a subprocess test would add a narrow Arq-version-regression guard. | +| 19 | Backlog | [chore_auto_followup_parent_advisory_lock](planned_features/02_mvp2/chore_auto_followup_parent_advisory_lock/idea.md) | Chore | The shipped `feat_auto_followup_studies` worker uses a two-layer idempotency scheme: | — | Idea — captured as a standalone file to resolve broken cross-references in `feat_auto_followup_studies` D-11 + plan F2 + `bug_auto_followup_completed_parent_stop_chain_race/idea.md`. The slug was coined 2026-05-24 in D-11 but only existed as descriptive prose across other documents until now. | +| 20 | Backlog | [bug_chat_long_conversation_truncation](planned_features/02_mvp2/bug_chat_long_conversation_truncation/idea.md) | Bug | [`backend/app/services/agent_chat.send_user_message`](../../backend/app/services/agent_chat.py) defensively caps the OpenAI history at the most recent `HISTORY_MAX_MESSAGES = 100` messages… | — | Held for MVP2 (decided 2026-05-13). Folder renamed with `_mvp2` suffix to make the deferral visible at-a-glance in `ls docs/00_overview/planned_features/`. Resume work when MVP2 starts — no technical dependency on MVP2 infra (audit_log is N/A; Langfuse is convenience only); the deferral is scope discipline + zero current impact (latent bug, no operator has hit the 100-message cap). | ## Dependency graph diff --git a/docs/00_overview/backlog_dashboard.html b/docs/00_overview/backlog_dashboard.html index ef5f310a..36a540cb 100644 --- a/docs/00_overview/backlog_dashboard.html +++ b/docs/00_overview/backlog_dashboard.html @@ -398,12 +398,12 @@

BACKLOG Progress

Specced features done
0 / 1
-
0% specced · 3 filed under BACKLOG
+
0% specced · 5 filed under BACKLOG
Pending work
-
3
+
5
every not-done feat/infra/chore/bug across all priorities
@@ -430,12 +430,12 @@

BACKLOG Progress

Backlog
-
1
+
3
captured for record, not planned
Legacy "Path to BACKLOG"
-
3
+
5
scoped not-done + bugs + chore-ideas only (excludes feat/infra ideas)
@@ -463,7 +463,7 @@

Pipeline

-

Idea 2

+

Idea 4

@@ -488,6 +488,32 @@

Idea 2

PR #299 added stale-status auto-recovery to the demo-reseed POST handler ([`_test.py`](../backend/app/api/v1/_test.py)): when the Redis status is `running` but `started_at` is older than `DEMO_RESEED_
+
+ + +
+ +
+ Chore + Backlog + +
+
`list_recent_completed_chains` at [`backend/app/db/repo/study.py`](planned_features/implemented_features/2026_06_04_feat_overnight_studies_summary_card) executes a single SQL query of the shape:
+ + +
+ + +
+ +
+ Chore + Backlog + +
+
`GET /api/v1/studies/chains/recent` ships with a fixed `limit` ceiling of 50 and inert pagination fields kept on the wire for forward compatibility — `next_cursor` is always `null`, `has_more` is alwa
+ +
diff --git a/docs/00_overview/mvp2_dashboard.html b/docs/00_overview/mvp2_dashboard.html index 2c885f6e..1f222f6b 100644 --- a/docs/00_overview/mvp2_dashboard.html +++ b/docs/00_overview/mvp2_dashboard.html @@ -398,12 +398,12 @@

MVP2 Progress

Specced features done
16 / 26
-
62% specced · 49 filed under MVP2
+
62% specced · 50 filed under MVP2
Pending work
-
31
+
32
every not-done feat/infra/chore/bug across all priorities
@@ -425,7 +425,7 @@

MVP2 Progress

P2 (default)
-
26
+
27
important to file, not blocking
@@ -442,7 +442,7 @@

MVP2 Progress

Backlog ideas: - 5 idea-only feat/infra folders (not yet scoped into MVP2) + 6 idea-only feat/infra folders (not yet scoped into MVP2) In flight: @@ -463,7 +463,7 @@

Pipeline

-

Idea 19

+

Idea 20

@@ -478,6 +478,19 @@

Idea 19

+
+ +
+ Feature + P2 + +
+
The public website **relyloop.com** is an MkDocs Material site at [`website/`](../../website) (deployed via GitHub Pages by [deploy-docs.yml](../../.github/workflows/deploy-docs.yml), `mkdocs build --
+ + +
+ +
diff --git a/docs/00_overview/planned_features/02_mvp2/feat_website_walkthrough_guides/idea.md b/docs/00_overview/planned_features/02_mvp2/feat_website_walkthrough_guides/idea.md new file mode 100644 index 00000000..62f639c3 --- /dev/null +++ b/docs/00_overview/planned_features/02_mvp2/feat_website_walkthrough_guides/idea.md @@ -0,0 +1,60 @@ +# Walkthrough guides on the public website (relyloop.com) + +**Date:** 2026-06-04 +**Status:** Idea — user request (operator noticed relyloop.com lacks the guides that ship in the running app) +**Priority:** P2 +**Origin:** User request — "our published website https://relyloop.com/ does not contain the guides that are in the running app … add them to the running website, well suited for both web and mobile, fully resizable, with a version including video if possible." +**Depends on:** None (the source guide content already exists in `docs/08_guides/` and `ui/public/guides/`) + +## Problem + +The public website **relyloop.com** is an MkDocs Material site at [`website/`](../../../../../website/) (deployed via GitHub Pages by [deploy-docs.yml](../../../../../.github/workflows/deploy-docs.yml), `mkdocs build --strict`). It ships **no guides, no screenshots, and no video** — only prose pages (Getting Started, Concepts, Engines, etc.). + +Meanwhile the **internal Next.js app** (localhost only, never published) ships a rich guide system the public never sees: +- **4 long-form markdown guides** in [`docs/08_guides/`](../../../../../docs/08_guides/): `tutorial-first-study.md`, `quick-tour.md`, `workflows-overview.md`, `llm-endpoint-setup.md`. +- **10 screenshot walkthrough decks** in [`ui/public/guides//`](../../../../../ui/public/guides/) — each with a `metadata.json` (title, description, estimated_time, tags, `screenshots[]` of `{file, caption}`, optional `video`), ~48 numbered PNGs total, and a `walkthrough.webm` slow-motion video. + +Prospective users evaluating RelyLoop on relyloop.com get none of this onboarding/visual material. The gap is pure surfacing — the content already exists and is maintained; it simply never reaches the published site. + +## Proposed capabilities + +User-confirmed decisions (2026-06-04): ship all three slices together; port **all 4** long-form guides; provide **MP4 + WebM** video (WebM alone does not play on iOS Safari). + +### Surface the 10 walkthrough decks on the website +- New top-level **Guides** nav tab with a **Walkthroughs** section + an overview/card-grid index page. +- One generated MkDocs page per deck: title, an estimated-time/tags admonition, the responsive video (when present), then each screenshot with its caption. +- Fully resizable / zoomable images via the **mkdocs-glightbox** plugin (auto pinch-zoom lightbox, mobile-friendly, alt text as caption — the MkDocs Material standard). +- Responsive on web and mobile (Material is responsive by default; add screenshot/video CSS so nothing overflows at phone width). + +### Embedded video, mobile-safe +- Responsive HTML5 `
+ {/* + feat_overnight_studies_summary_card Story 2.2 — "Ran while you + were away" card. Self-contained (owns its own data + visited- + state); early-returns null on empty / error / pending so the + studies table below always renders predictably. + */} + {targetFromUrl && (
Active filters: diff --git a/ui/src/components/studies/recent-chains-card.tsx b/ui/src/components/studies/recent-chains-card.tsx new file mode 100644 index 00000000..2b5386de --- /dev/null +++ b/ui/src/components/studies/recent-chains-card.tsx @@ -0,0 +1,163 @@ +// SPDX-FileCopyrightText: 2026 soundminds.ai +// +// SPDX-License-Identifier: Apache-2.0 + +'use client'; + +/** + * `` — feat_overnight_studies_summary_card Story 2.2. + * + * The "Ran while you were away" card that surfaces recently-completed + * overnight chains at the top of `/studies` (FR-1, FR-3, FR-4, FR-5, + * FR-6). + * + * The card is self-contained: + * + * - Owns its data via `useRecentChains(since)`, where `since` comes + * from `useStudiesVisited()`. Does NOT depend on the page's + * `useStudies()` query. + * - Early-returns `null` on pending / error / empty so it never blocks + * the studies table beneath it (best-effort discoverability per + * spec §10 "Failure modes"). + * - "Got it" calls `dismiss(maxTailCompletedAt)` which writes + * `max(tail_completed_at) + 1ms` to localStorage; the next query + * refetch returns an empty list and the card unmounts (FR-5). + * + * Stop-reason phrasing reuses `CHAIN_STOP_REASON_PHRASE` from + * `ui/src/lib/chain-stop-reason.ts` — the same Map shipped with + * `feat_overnight_final_solution_phase2` (Story 1 / FR-8) so both the + * chain panel and this card stay aligned on a single source of truth + * for the six wire values defined in + * `backend/app/domain/study/chain_summary.py` `CHAIN_STOP_REASONS`. + */ + +import Link from 'next/link'; + +import { InfoTooltip } from '@/components/common/info-tooltip'; +import { Button } from '@/components/ui/button'; +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'; +import { useStudiesVisited } from '@/hooks/use-studies-visited'; +import { useRecentChains, type RecentChainSummary } from '@/lib/api/studies'; +import { CHAIN_STOP_REASON_PHRASE } from '@/lib/chain-stop-reason'; +import { formatSignedLift } from '@/lib/format-lift'; + +function formatBestMetric(value: number | null | undefined): string { + if (value === null || value === undefined) return '—'; + return value.toFixed(4); +} + +interface ChainRowProps { + row: RecentChainSummary; +} + +function ChainRow({ row }: ChainRowProps) { + // CHAIN_STOP_REASON_PHRASE is typed Record so this + // lookup is exhaustive under TypeScript. The runtime `?? 'Chain stopped'` + // is defensive: if the backend ever ships a NEW wire value before the + // frontend redeploys, we render a generic phrase rather than leak the raw + // enum (e.g. "no_lift_v2") to the user. + const stopPhrase = CHAIN_STOP_REASON_PHRASE[row.stop_reason] ?? 'Chain stopped'; + const hasMetric = row.best_metric !== null && row.best_metric !== undefined; + + return ( +
  • +
    + + {row.anchor_name} + + {row.chain_length} studies +
    + {hasMetric ? ( +
    + + Best {row.objective_metric || 'metric'}:{' '} + {formatBestMetric(row.best_metric)} + + + Lift:{' '} + + {formatSignedLift(row.cumulative_lift)} + + + Stopped: {stopPhrase} +
    + ) : ( + // Null-metric branch (AC-11): the chain has no surfaceable best + // metric (e.g. terminal-failed tail). Drop the numeric line + // entirely and lead with the stop-reason phrase so the row + // reads as "the chain ended without a winning trial" rather + // than "best — / lift —". +
    Stopped: {stopPhrase}
    + )} +
  • + ); +} + +export function RecentChainsCard(): React.ReactNode { + const { since, dismiss } = useStudiesVisited(); + const query = useRecentChains(since); + + // Best-effort discoverability — pending / error / empty all collapse + // to `null` so the studies table beneath always renders predictably + // (FR-3 + spec §10 failure modes). + if (query.isPending) return null; + if (query.isError) return null; + const rows = query.data?.data ?? []; + if (rows.length === 0) return null; + + const tailTimes = rows + .map((r) => Date.parse(r.tail_completed_at)) + .filter((n) => Number.isFinite(n)); + // Defensive: tailTimes should always be non-empty when rows is, but + // guard against a malformed timestamp slipping through (the dismiss + // hook already silently no-ops on NaN, so this is belt-and-suspenders). + const maxTail = + tailTimes.length > 0 + ? new Date(Math.max(...tailTimes)).toISOString() + : rows[rows.length - 1]?.tail_completed_at; + + const handleDismiss = (): void => { + if (maxTail !== undefined) { + dismiss(maxTail); + } + }; + + return ( + + +
    + + Ran while you were away + + +

    + Overnight follow-up chains that + completed since your last visit. +

    +
    + +
    + +
      + {rows.map((row) => ( + + ))} +
    +
    +
    + ); +} diff --git a/ui/src/hooks/use-studies-visited.ts b/ui/src/hooks/use-studies-visited.ts new file mode 100644 index 00000000..3a153c8f --- /dev/null +++ b/ui/src/hooks/use-studies-visited.ts @@ -0,0 +1,95 @@ +// SPDX-FileCopyrightText: 2026 soundminds.ai +// +// SPDX-License-Identifier: Apache-2.0 + +'use client'; + +/** + * `useStudiesVisited` — feat_overnight_studies_summary_card Story 2.1. + * + * Tracks "when did the operator last visit `/studies`" in localStorage so + * the "Ran while you were away" card can scope its discovery query to + * chains whose tails completed AFTER that cutoff (FR-5). + * + * ## Return shape + * + * - `since: string` — ISO-8601 timestamp. On the first visit (no value + * in localStorage yet), defaults to `now − 7 days` so the card shows + * a sensible week's worth of history rather than every chain ever. + * - `dismiss(maxTailCompletedAt: string): void` — stores + * `maxTailCompletedAt + 1ms` as the new cutoff so the card unmounts + * on next refetch. The +1ms exclusive nudge prevents the same chain + * from re-appearing if the operator dismisses then reloads (the + * endpoint's `since` filter is inclusive — `completed_at >= since`). + * + * ## SSR safety + * + * The hook reads localStorage in a `useState` lazy initializer guarded + * by `typeof window`; the first server render emits the default + * 7-day-ago timestamp, and the first client effect re-syncs from + * localStorage. Matches the pattern in `useLocalStorageSet` + * (`ui/src/hooks/use-local-storage-set.ts`). + */ + +import { useCallback, useState } from 'react'; + +const STORAGE_KEY = 'relyloop.last_visited_studies_at'; +const SEVEN_DAYS_MS = 7 * 24 * 60 * 60 * 1000; + +function defaultSince(): string { + return new Date(Date.now() - SEVEN_DAYS_MS).toISOString(); +} + +function readVisitedAt(): string { + if (typeof window === 'undefined') return defaultSince(); + try { + const raw = window.localStorage.getItem(STORAGE_KEY); + // Guard against a corrupt localStorage value (operator manual edit, + // partial write, stored value from an older release with a different + // shape). An invalid date would otherwise propagate to + // GET /api/v1/studies/chains/recent?since= → 422 cascade. + // Per Gemini Code Assist PR-444 finding #4. + if (raw && !Number.isNaN(Date.parse(raw))) return raw; + } catch { + // Private browsing / quota / corrupt — fall back to default. + } + return defaultSince(); +} + +function writeVisitedAt(value: string): void { + if (typeof window === 'undefined') return; + try { + window.localStorage.setItem(STORAGE_KEY, value); + } catch { + // Quota / private browsing — silently drop the write. + } +} + +export interface UseStudiesVisitedResult { + since: string; + dismiss: (maxTailCompletedAt: string) => void; +} + +export function useStudiesVisited(): UseStudiesVisitedResult { + // Hydrate synchronously via the useState initializer — matches the + // pattern in useLocalStorageSet (no extra render cycle, safe under + // SSR because readVisitedAt() guards on `typeof window`). + const [since, setSince] = useState(() => readVisitedAt()); + + const dismiss = useCallback((maxTailCompletedAt: string): void => { + const parsed = Date.parse(maxTailCompletedAt); + if (Number.isNaN(parsed)) { + // Defensive: a malformed input MUST NOT throw a render. Skip the + // dismissal entirely — the card stays visible. Operator can dismiss + // again on the next refetch. + return; + } + // +1ms exclusive nudge so the inclusive `since` filter doesn't + // re-show the just-dismissed chain (FR-5). + const next = new Date(parsed + 1).toISOString(); + writeVisitedAt(next); + setSince(next); + }, []); + + return { since, dismiss }; +} diff --git a/ui/src/lib/api/studies.ts b/ui/src/lib/api/studies.ts index 05576ed7..ae258bc8 100644 --- a/ui/src/lib/api/studies.ts +++ b/ui/src/lib/api/studies.ts @@ -23,6 +23,8 @@ export type TrialListResponse = components['schemas']['TrialListResponse']; export type CreateStudyRequest = components['schemas']['CreateStudyRequest']; export type StudyChainResponse = components['schemas']['StudyChainResponse']; export type StudyChainLink = components['schemas']['StudyChainLink']; +export type RecentChainSummary = components['schemas']['RecentChainSummary']; +export type RecentChainsResponse = components['schemas']['RecentChainsResponse']; /** Single-page list response augmented with the parsed `X-Total-Count` header. */ export type StudyListPage = StudyListResponse & { totalCount: number }; @@ -237,3 +239,42 @@ export function useStudyChain( refetchOnReconnect: true, }); } + +// ============================================================================= +// feat_overnight_studies_summary_card Story 2.1 — recent-chains discovery hook +// ============================================================================= + +export interface UseRecentChainsOptions { + enabled?: boolean; +} + +/** + * Fetch recently-completed overnight chains for the "Ran while you were + * away" card on `/studies` (FR-1). + * + * `since` is the cutoff supplied by `useStudiesVisited()` — chains whose + * tail completed at or after this ISO-8601 timestamp are returned. The + * default page size of 20 matches the backend's `limit` ceiling for the + * v1 endpoint; pagination is inert (`next_cursor: null`, `has_more: + * false`) per OQ-2. + * + * Refetch contract: window focus + reconnect only, no aggressive + * polling — the card is best-effort discoverability, not a live feed. + */ +export function useRecentChains( + since: string, + options: UseRecentChainsOptions = {}, +): UseQueryResult { + return useQuery({ + queryKey: ['studies', 'recent-chains', since], + queryFn: async () => { + const { data } = await apiClient.get('/api/v1/studies/chains/recent', { + params: { since, limit: 20 }, + }); + return data; + }, + refetchOnWindowFocus: true, + refetchOnReconnect: true, + enabled: options.enabled ?? true, + }); +} diff --git a/ui/src/lib/glossary.ts b/ui/src/lib/glossary.ts index 32cf8e82..d2d07a4f 100644 --- a/ui/src/lib/glossary.ts +++ b/ui/src/lib/glossary.ts @@ -983,6 +983,21 @@ export const glossary = { ].join('\n'), ariaLabel: 'More information about the auto-followup strategy line', }, + // feat_overnight_studies_summary_card Story 2.3 / FR-6 — new key for the + // "Ran while you were away" card on the /studies index page. Compresses + // the spec §11 tooltip-inventory copy into ≤140-char short + 800-char long. + recent_chains_card: { + short: + "RelyLoop ran follow-up studies overnight. This card shows chains that finished since your last visit; 'Got it' hides it.", + long: [ + 'Appears at the top of `/studies` when one or more auto-followup chains have completed (at least 2 linked studies, terminated) since the last time you visited this page. Each row summarizes one chain: the anchor study, how many links it ran, the best metric the chain produced, the total lift over the anchor baseline, and why the chain stopped.', + '', + 'Click `Review chain` to open the anchor study and inspect the full chain panel. Click `Got it` to dismiss every row currently shown — the card stays hidden until a brand-new chain finishes after that dismissal.', + '', + 'The card is best-effort: when the backend is unreachable or no chains have finished since your last visit, it simply does not render — it never blocks the studies table from loading.', + ].join('\n'), + ariaLabel: 'More information about the recent chains card', + }, auto_followup_chain: { short: 'RelyLoop ran follow-up studies automatically based on this study’s winner. Each follow-up narrowed the search bounds.', diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts index 719a0345..3dd484b8 100644 --- a/ui/src/lib/types.ts +++ b/ui/src/lib/types.ts @@ -1133,6 +1133,41 @@ export interface paths { patch?: never; trace?: never; }; + "/api/v1/studies/chains/recent": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** + * Get Recent Chains + * @description List recently-completed overnight chains (FR-1, AC-1/2/3/4/5/6/11/12). + * + * Returns the deduplicated set of completed overnight chains (length + * >= 2) ordered newest-tail-completion-first, capped at ``limit``. The + * ``since`` filter restricts to chains whose tail completed at or + * after the cutoff (used by the card to seed the "what's new since I + * last visited" query). + * + * Malformed ``since`` / out-of-range ``limit`` flow through the + * global ``validation_exception_handler`` and return the canonical + * 422 ``VALIDATION_ERROR`` envelope (no manual parse path). + * + * Pagination: inert. ``next_cursor=null`` and ``has_more=false`` + * always — OQ-2 resolved limit-cap-only for v1. Keyset pagination + * deferred to a separate ``chore_`` idea filed against the spec's + * open questions. + */ + get: operations["get_recent_chains_api_v1_studies_chains_recent_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; "/api/v1/studies/{study_id}": { parameters: { query?: never; @@ -2907,6 +2942,69 @@ export interface components { /** Version */ version: number; }; + /** + * RecentChainSummary + * @description One row in the ``GET /api/v1/studies/chains/recent`` response. + * + * Per spec §8.1 (feat_overnight_studies_summary_card). Per-chain + * rollup feeding the "Ran while you were away" card on ``/studies`` + * — anchor identity + chain length + the best link's metric + the + * chain's cumulative lift + the derived stop reason + the + * surfaceable proposal id for the best link. Read-only; no state + * transitions, no audit events. + */ + RecentChainSummary: { + /** Anchor Name */ + anchor_name: string; + /** Anchor Study Id */ + anchor_study_id: string; + /** Best Link Proposal Id */ + best_link_proposal_id: string | null; + /** Best Metric */ + best_metric: number | null; + /** Chain Length */ + chain_length: number; + /** Cumulative Lift */ + cumulative_lift: number | null; + /** + * Direction + * @enum {string} + */ + direction: "maximize" | "minimize"; + /** Objective Metric */ + objective_metric: string; + /** + * Stop Reason + * @enum {string} + */ + stop_reason: "depth_exhausted" | "no_lift" | "budget" | "parent_failed" | "cancelled" | "in_flight"; + /** + * Tail Completed At + * Format: date-time + */ + tail_completed_at: string; + }; + /** + * RecentChainsResponse + * @description ``GET /api/v1/studies/chains/recent`` response shape. + * + * Inert pagination: this endpoint emits ``next_cursor=null`` and + * ``has_more=false`` always (OQ-2 resolved — limit-cap only). The + * fields stay on the wire for consistency with the rest of the + * studies surface, so a future MVP3 keyset-pagination story can + * populate them without breaking clients (idea filed in this PR). + */ + RecentChainsResponse: { + /** Data */ + data: components["schemas"]["RecentChainSummary"][]; + /** + * Has More + * @default false + */ + has_more: boolean; + /** Next Cursor */ + next_cursor?: string | null; + }; /** * RegressorRowShape * @description One row in the named-regressors or named-improvers table. @@ -5641,6 +5739,38 @@ export interface operations { }; }; }; + get_recent_chains_api_v1_studies_chains_recent_get: { + parameters: { + query?: { + since?: string | null; + limit?: number; + }; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["RecentChainsResponse"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; get_study_detail_api_v1_studies__study_id__get: { parameters: { query?: never; diff --git a/ui/tests/e2e/recent-chains-card.spec.ts b/ui/tests/e2e/recent-chains-card.spec.ts new file mode 100644 index 00000000..f006ff36 --- /dev/null +++ b/ui/tests/e2e/recent-chains-card.spec.ts @@ -0,0 +1,123 @@ +// SPDX-FileCopyrightText: 2026 soundminds.ai +// +// SPDX-License-Identifier: Apache-2.0 + +/** + * E2E spec: "Ran while you were away" card on /studies + * (feat_overnight_studies_summary_card Story 3.1). + * + * Real-backend Playwright coverage of the card's mount + review-link + * behavior: + * + * - Seeds a 2+ link chain whose leaf is terminal (`inFlightLeaf=false`) + * via the `seedAutoFollowupChain` helper, then sets the localStorage + * visited cutoff to "before" the chain via `page.addInitScript()` so + * the card has rows to show. + * - Asserts the card renders with the anchor name and a clickable + * review-chain link pointing at `/studies/{root_id}`. + * - Asserts "Got it" dismisses the card so it's gone after a reload + * (FR-5 — the +1ms exclusive nudge prevents re-show). + * + * Per CLAUDE.md E2E rules: no `page.route()` mocking of backend; assertions + * verify browser-visible behavior via `page`; `request` is used only via + * the seed helpers for test setup. + * + * Limitation: the seed-chain endpoint creates chain links without + * `selected_followup_kind` (legacy narrow per Phase 1 D-12), and the + * leaf may or may not carry a usable `best_metric` — the test asserts + * card visibility + anchor link target, not the per-row metric content. + * Metric / null-metric / stop-reason rendering is covered exhaustively + * by the vitest component suite in `__tests__/components/studies/ + * recent-chains-card.test.tsx`. + */ + +import { expect, test } from '@playwright/test'; + +import { seedAutoFollowupChain, seedFullChain } from './helpers/seed'; + +test.describe('/studies — Recent chains card', () => { + test('renders the card with a working Review chain link after a terminated chain', async ({ + page, + }) => { + // Seed a 3-link chain (depth=2 = parent + middle + leaf), all completed + // (leaf not in-flight). The chain endpoint sees the leaf as terminal, + // so the discovery query returns one row for the chain. + const fixture = await seedFullChain(2); + const { rootId } = await seedAutoFollowupChain({ + clusterId: fixture.clusterId, + querySetId: fixture.querySetId, + templateId: fixture.templateId, + judgmentListId: fixture.judgmentListId, + depth: 2, + inFlightLeaf: false, + inFlightMiddle: false, + }); + + // Force the visited-state cutoff into the past so the card shows the + // chain that was just seeded — the default 7-day window already + // covers it, but this is belt-and-suspenders and decouples the test + // from clock skew between the seeder + the browser tab. + await page.addInitScript(() => { + window.localStorage.setItem( + 'relyloop.last_visited_studies_at', + '2000-01-01T00:00:00.000Z', + ); + }); + + await page.goto('/studies'); + + // Card visible. + const card = page.getByTestId('recent-chains-card'); + await expect(card).toBeVisible(); + await expect(card.getByText('Ran while you were away')).toBeVisible(); + + // The anchor link points at /studies/{rootId} (the chain's anchor is + // the root, NOT the leaf — the discovery query dedupes by anchor). + const anchorLink = card.getByTestId(`recent-chains-card-anchor-link-${rootId}`); + await expect(anchorLink).toBeVisible(); + await expect(anchorLink).toHaveAttribute('href', `/studies/${rootId}`); + + // Click through and confirm navigation. The study page summary is + // the canonical "we landed on the right page" assertion (matches + // the existing overnight-result-card spec). + await anchorLink.click(); + await expect(page).toHaveURL(new RegExp(`/studies/${rootId}$`)); + await expect(page.getByTestId('study-page-summary')).toBeVisible(); + }); + + test('"Got it" dismisses the card; it stays hidden after reload (FR-5)', async ({ page }) => { + const fixture = await seedFullChain(2); + await seedAutoFollowupChain({ + clusterId: fixture.clusterId, + querySetId: fixture.querySetId, + templateId: fixture.templateId, + judgmentListId: fixture.judgmentListId, + depth: 2, + inFlightLeaf: false, + inFlightMiddle: false, + }); + + await page.addInitScript(() => { + window.localStorage.setItem( + 'relyloop.last_visited_studies_at', + '2000-01-01T00:00:00.000Z', + ); + }); + + await page.goto('/studies'); + + // Card visible first. + const card = page.getByTestId('recent-chains-card'); + await expect(card).toBeVisible(); + + // Dismiss + verify the card unmounts on the next refetch (the query + // key includes `since`, so the new cutoff produces an empty list). + await card.getByTestId('recent-chains-card-dismiss').click(); + await expect(page.getByTestId('recent-chains-card')).toHaveCount(0); + + // Reload — visited cutoff persists in localStorage; the card stays + // gone because the chain's tail completed_at is now < since. + await page.reload(); + await expect(page.getByTestId('recent-chains-card')).toHaveCount(0); + }); +}); diff --git a/website/docs/roadmap.md b/website/docs/roadmap.md index fc5dc5b3..9860fb3a 100644 --- a/website/docs/roadmap.md +++ b/website/docs/roadmap.md @@ -185,6 +185,7 @@ - 🟡 [Query Normalization Tuning](https://github.com/SoundMindsAI/relyloop/tree/main/docs/00_overview/planned_features/02_mvp2/feat_query_normalization_tuning) - 🟡 [Query Normalizer Typed Pipeline](https://github.com/SoundMindsAI/relyloop/tree/main/docs/00_overview/planned_features/02_mvp2/feat_query_normalizer_typed_pipeline) - 🟡 [UBI LLM Study Comparison](https://github.com/SoundMindsAI/relyloop/tree/main/docs/00_overview/planned_features/02_mvp2/feat_ubi_llm_study_comparison) +- 🟡 [Website Walkthrough Guides](https://github.com/SoundMindsAI/relyloop/tree/main/docs/00_overview/planned_features/02_mvp2/feat_website_walkthrough_guides) ??? note "Infrastructure & tooling (7)"