diff --git a/docs/00_overview/implemented_features/2026_06_02_feat_studies_convergence_visibility/implementation_plan.md b/docs/00_overview/implemented_features/2026_06_02_feat_studies_convergence_visibility/implementation_plan.md index 0ee7fbb6..0dd636f8 100644 --- a/docs/00_overview/implemented_features/2026_06_02_feat_studies_convergence_visibility/implementation_plan.md +++ b/docs/00_overview/implemented_features/2026_06_02_feat_studies_convergence_visibility/implementation_plan.md @@ -278,7 +278,7 @@ No migration (§3.5 migration verification N/A). **Epic 1 (shipped to main via PR #421 `e5c3b8b9` squash-merge, 2026-06-02):** - [x] Story 1.1 — backend list fields (`b90d5477` on the feat branch, landed on main via PR #421; 8 unit + 7 integration + 3 contract tests green; latent `_summary` invalid-direction crash fixed inline as a tangential discovery surfaced by AC-3b; Epic-1 phase-review F1 parity bug (count-gate `primary_metric IS NOT NULL`) fixed in `a0c40d37`) -- [x] Story 1.2 — frontend Trials + Convergence columns (`ed5ca276` on the feat branch, landed on main via PR #421; 1012 vitest, build clean, E2E spec) +- [x] Story 1.2 — frontend Trials + Convergence columns (`ed5ca276` on the feat branch). **CORRECTION (2026-06-03):** this commit did NOT actually land on `main` — it was dropped during the PR #421/#422 rebase that de-duplicated Epic 1 commits (the Story 1.1 *backend* fields survived in `b90d5477`, but the Story 1.2 *frontend* columns in `ed5ca276` were lost). The `/studies` list shipped with only its original 6 columns; `trial_count` + `convergence_verdict` sat returned-but-unsurfaced. Restored 2026-06-03 by cherry-picking `ed5ca276` in `feat_studies_list_trial_convergence_columns` — which also added the `TooltipProvider` wrapper to `page.test.tsx` that the header `tooltipKey` requires (a gap the lost commit never exercised against CI, confirming it never ran the full suite on main). - [x] Epic 1 phase gate — GPT-5.5 review: 5 findings (4 accepted+fixed `a0c40d37`, F4 rejected with counter-evidence); re-review clean. **Epic 2 (in flight as PR #422):** diff --git a/state_history.md b/state_history.md index 12d0c7eb..9165d978 100644 --- a/state_history.md +++ b/state_history.md @@ -272,6 +272,16 @@ backend + frontend code. **Epic 2** (demo data enrichment) shipped via compact labels `Converged`/`Improving`/`Too few trials`/em-dash. The verdict→badge map is `satisfies Record` so a missing/extra verdict is a compile error. + - **CORRECTION (2026-06-03):** the frontend columns described in this + bullet (commit `ed5ca276`) did NOT actually ship with PR #421 — they + were dropped in the PR #421/#422 rebase that de-duplicated Epic 1 + commits. Only the Story 1.1 *backend* fields (`b90d5477`) landed; the + `/studies` list ran with its original 6 columns and `trial_count` + + `convergence_verdict` sat returned-but-unsurfaced for ~30 hours. + Discovered + restored 2026-06-03 (`feat_studies_list_trial_convergence_columns`) + by cherry-picking `ed5ca276`, which also caught a `TooltipProvider` + gap in `page.test.tsx` the lost commit never exercised — confirming + `ed5ca276` never ran the full suite on main. **Epic 2 — what shipped (PR #422):** diff --git a/ui/public/guides/06_create_and_monitor_study/01-studies-list.png b/ui/public/guides/06_create_and_monitor_study/01-studies-list.png index b0aa6f26..6908d986 100644 Binary files a/ui/public/guides/06_create_and_monitor_study/01-studies-list.png and b/ui/public/guides/06_create_and_monitor_study/01-studies-list.png differ diff --git a/ui/public/guides/06_create_and_monitor_study/03-create-study-modal.png b/ui/public/guides/06_create_and_monitor_study/03-create-study-modal.png index e180efe3..e44f06ce 100644 Binary files a/ui/public/guides/06_create_and_monitor_study/03-create-study-modal.png and b/ui/public/guides/06_create_and_monitor_study/03-create-study-modal.png differ diff --git a/ui/public/guides/06_create_and_monitor_study/04-study-detail.png b/ui/public/guides/06_create_and_monitor_study/04-study-detail.png index c8268e38..b39770a8 100644 Binary files a/ui/public/guides/06_create_and_monitor_study/04-study-detail.png and b/ui/public/guides/06_create_and_monitor_study/04-study-detail.png differ diff --git a/ui/public/guides/06_create_and_monitor_study/walkthrough.webm b/ui/public/guides/06_create_and_monitor_study/walkthrough.webm index 4ebfd13d..41f5168a 100644 Binary files a/ui/public/guides/06_create_and_monitor_study/walkthrough.webm and b/ui/public/guides/06_create_and_monitor_study/walkthrough.webm differ diff --git a/ui/src/__tests__/app/studies/page.test.tsx b/ui/src/__tests__/app/studies/page.test.tsx index cd7311fe..3adca513 100644 --- a/ui/src/__tests__/app/studies/page.test.tsx +++ b/ui/src/__tests__/app/studies/page.test.tsx @@ -8,6 +8,8 @@ import { http, HttpResponse } from 'msw'; import { type ReactNode } from 'react'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { TooltipProvider } from '@/components/ui/tooltip'; + import { server } from '../../setup'; const API_BASE = 'http://api.test'; @@ -39,8 +41,15 @@ async function renderPage() { const qc = new QueryClient({ defaultOptions: { queries: { retry: false } } }); const { default: StudiesPage } = await import('@/app/studies/page'); return render( + // TooltipProvider mirrors the real app shell (app/layout.tsx wraps the + // whole tree in one). The studies-table Trials/Convergence columns render + // an in the column HEADER (via tooltipKey), and Radix's + // Tooltip throws without a provider ancestor — so this isolated page + // render needs the provider the layout would otherwise supply. - + + + , ); } @@ -65,6 +74,13 @@ function studyRows(count = 2) { direction: 'maximize' as const, created_at: '2026-05-12T00:00:00Z', completed_at: null, + // trial_count (required) + convergence_verdict (nullable) are part of + // the StudySummary wire shape since feat_studies_convergence_visibility + // (PR #421); the Trials + Convergence list columns + // (feat_studies_list_trial_convergence_columns) render them, so the mock + // must carry them or the Trials cell hits undefined.toLocaleString(). + trial_count: i * 3, + convergence_verdict: null, })); } diff --git a/ui/src/__tests__/components/studies/studies-table-ceiling-badge.test.tsx b/ui/src/__tests__/components/studies/studies-table-ceiling-badge.test.tsx index 135e59b0..5aa2560c 100644 --- a/ui/src/__tests__/components/studies/studies-table-ceiling-badge.test.tsx +++ b/ui/src/__tests__/components/studies/studies-table-ceiling-badge.test.tsx @@ -37,14 +37,13 @@ function renderBestMetricCell(overrides: Partial) { direction: 'maximize', created_at: '2026-05-29T00:00:00Z', completed_at: '2026-05-29T00:05:00Z', - // `trial_count` is a required `int = 0` field on the backend (per - // `backend/app/api/v1/schemas.py:902`, shipped with - // `feat_studies_convergence_visibility` / PR #421). The freshness- - // gate regen of `types.ts` surfaced that this fixture omitted it - // (it had been working by accident against a stale committed - // types.ts that didn't yet reflect the new required field) — - // tangential fix while shipping `infra_generated_artifact_freshness_gate`. + // `trial_count` (required) + `convergence_verdict` (nullable) are part + // of the StudySummary wire shape since `feat_studies_convergence_visibility` + // (PR #421). The Trials + Convergence list columns render them, so the + // fixture must carry both — value is immaterial to the ceiling-badge + // assertion, which only exercises best_metric. trial_count: 0, + convergence_verdict: null, ...overrides, }; // The DataTable cell renderer only reads `row.original`; a minimal stub diff --git a/ui/src/__tests__/components/studies/studies-table-convergence-column.test.tsx b/ui/src/__tests__/components/studies/studies-table-convergence-column.test.tsx new file mode 100644 index 00000000..d0821791 --- /dev/null +++ b/ui/src/__tests__/components/studies/studies-table-convergence-column.test.tsx @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: 2026 soundminds.ai +// +// SPDX-License-Identifier: Apache-2.0 + +/** + * Tests for the studies-list Trials + Convergence columns + * (feat_studies_convergence_visibility Story 1.2 / FR-4). + * + * Verifies: + * - the trial_count cell renders the numeric count; + * - the convergence badge renders the correct compact label/variant for + * each verdict, sourced from CONVERGENCE_VERDICT_VALUES; + * - a null verdict renders an em-dash (no badge), matching the detail + * panel's null-state behavior. + */ +import { render, screen } from '@testing-library/react'; +import { type ReactNode } from 'react'; +import { describe, expect, it } from 'vitest'; + +import { studiesColumns } from '@/components/studies/studies-table.column-config'; +import { TooltipProvider } from '@/components/ui/tooltip'; +import type { StudySummary } from '@/lib/api/studies'; +import { CONVERGENCE_VERDICT_VALUES } from '@/lib/enums'; + +function baseStudy(overrides: Partial): StudySummary { + return { + id: 'study-1', + name: 'demo', + cluster_id: 'c1', + status: 'completed', + best_metric: 0.8, + direction: 'maximize', + created_at: '2026-05-29T00:00:00Z', + completed_at: '2026-05-29T00:05:00Z', + trial_count: 50, + convergence_verdict: null, + ...overrides, + }; +} + +function renderCell(columnId: string, original: StudySummary) { + const column = studiesColumns.find((c) => c.id === columnId); + if (!column?.cell || typeof column.cell !== 'function') { + throw new Error(`${columnId} column or its cell renderer not found`); + } + const cell = column.cell as (ctx: { row: { original: StudySummary } }) => ReactNode; + return render({cell({ row: { original } })}); +} + +describe('studies-table Trials column', () => { + it('renders the non-baseline trial count', () => { + renderCell('trial_count', baseStudy({ trial_count: 37 })); + expect(screen.getByText('37')).toBeInTheDocument(); + }); + + it('renders zero for a queued study', () => { + renderCell('trial_count', baseStudy({ status: 'queued', trial_count: 0 })); + expect(screen.getByText('0')).toBeInTheDocument(); + }); +}); + +describe('studies-table Convergence column', () => { + const cases: Array<[StudySummary['convergence_verdict'], string]> = [ + ['converged', 'Converged'], + ['still_improving', 'Improving'], + ['too_few_trials', 'Too few trials'], + ]; + + it.each(cases)('renders the %s verdict as "%s"', (verdict, label) => { + const { container } = renderCell( + 'convergence_verdict', + baseStudy({ convergence_verdict: verdict }), + ); + expect(screen.getByText(label)).toBeInTheDocument(); + // The data-verdict attribute carries the wire value for downstream + // assertions / E2E hooks. + expect(container.querySelector(`[data-verdict="${verdict}"]`)).not.toBeNull(); + }); + + it('renders an em-dash for a null verdict (no badge)', () => { + const { container } = renderCell( + 'convergence_verdict', + baseStudy({ convergence_verdict: null }), + ); + expect(screen.getByText('—')).toBeInTheDocument(); + expect(container.querySelector('[data-verdict]')).toBeNull(); + }); + + it('has a label for every backend verdict literal (no missing badge)', () => { + // Source-of-truth guard: every value in CONVERGENCE_VERDICT_VALUES must + // render a non-em-dash badge. A backend verdict added without a label + // here would slip through TypeScript only if the `satisfies` map drifted; + // this is the runtime backstop. + for (const verdict of CONVERGENCE_VERDICT_VALUES) { + const { container, unmount } = renderCell( + 'convergence_verdict', + baseStudy({ convergence_verdict: verdict }), + ); + expect(container.querySelector(`[data-verdict="${verdict}"]`)).not.toBeNull(); + unmount(); + } + }); + + it('falls back to an em-dash for an unmapped verdict (forward-compat guard)', () => { + // Regression test for the Gemini PR #438 finding (accepted): a newer + // backend could emit a convergence_verdict this frontend snapshot doesn't + // map (rolling deploy). The cast simulates that out-of-union runtime value + // — TypeScript would never let it through the typed path, but the wire can. + // The cell must render the same em-dash as the null state, NOT crash on + // `badge.variant`. + const unknown = 'diverged' as StudySummary['convergence_verdict']; + const { container } = renderCell( + 'convergence_verdict', + baseStudy({ convergence_verdict: unknown }), + ); + expect(screen.getByText('—')).toBeInTheDocument(); + // No badge rendered for the unmapped value. + expect(container.querySelector('[data-verdict]')).toBeNull(); + }); +}); diff --git a/ui/src/components/studies/studies-table.column-config.tsx b/ui/src/components/studies/studies-table.column-config.tsx index e66b9faf..967205c8 100644 --- a/ui/src/components/studies/studies-table.column-config.tsx +++ b/ui/src/components/studies/studies-table.column-config.tsx @@ -21,6 +21,7 @@ import { Badge } from '@/components/ui/badge'; import type { DataTableColumnDef } from '@/components/common/types'; import type { StudySummary } from '@/lib/api/studies'; import { STUDY_STATUS_VALUES } from '@/lib/enums'; +import type { ConvergenceVerdict } from '@/lib/enums'; /** * Threshold heuristic for the "ceiling" badge — when `best_metric` is at or @@ -31,6 +32,25 @@ import { STUDY_STATUS_VALUES } from '@/lib/enums'; */ const METRIC_CEILING_THRESHOLD = 0.99; +/** + * Compact convergence-verdict badge map for the studies-list column + * (feat_studies_convergence_visibility Story 1.2 / FR-4). Typed + * `satisfies Record` so a verdict added to (or + * removed from) the backend `ConvergenceVerdict` Literal — mirrored by + * `CONVERGENCE_VERDICT_VALUES` in `@/lib/enums` — becomes a COMPILE error + * here, not a silent missing badge. The detail page's `` + * uses the fuller labels ("Still improving when it stopped", "Too few + * trials to tell"); the dense list uses these compact forms. + * + * Wire values must match backend/app/domain/study/convergence.py + * ConvergenceVerdict (via CONVERGENCE_VERDICT_VALUES in @/lib/enums). + */ +const VERDICT_BADGE = { + converged: { label: 'Converged', variant: 'success' as const }, + still_improving: { label: 'Improving', variant: 'warning' as const }, + too_few_trials: { label: 'Too few trials', variant: 'warning' as const }, +} satisfies Record; + export const studiesColumns: DataTableColumnDef[] = [ { id: 'name', @@ -109,6 +129,58 @@ export const studiesColumns: DataTableColumnDef[] = [ ); }, }, + { + id: 'trial_count', + header: 'Trials', + // tooltipKey renders an in the header (data-table.tsx:435) + // while keeping `header` a string so the column-visibility menu shows + // "Trials" (not the column id) — see data-table.tsx:316. + tooltipKey: 'study.trial_count', + accessorKey: 'trial_count', + sortable: false, + hideable: true, + cell: ({ row }) => {row.original.trial_count}, + }, + { + id: 'convergence_verdict', + header: 'Convergence', + tooltipKey: 'convergence_verdict', + accessorKey: 'convergence_verdict', + sortable: false, + hideable: true, + cell: ({ row }) => { + const verdict = row.original.convergence_verdict; + // null verdict (in-flight, invalid-direction, or < 5 complete trials) + // renders an em-dash, never a badge — matches the detail panel's + // null-state behavior. + if (verdict == null) return ; + // `verdict` is narrowed to the ConvergenceVerdict union and VERDICT_BADGE + // is `satisfies Record`, so this lookup is + // provably total + safe at compile time — not user-controlled object + // injection. + // eslint-disable-next-line security/detect-object-injection + const badge = VERDICT_BADGE[verdict]; + // Forward-compat guard (Gemini PR #438 review, accepted): convergence_verdict + // is a backend-COMPUTED classification (backend/app/domain/study/convergence.py), + // not a fixed DB-enum, so a newer backend could emit a verdict this snapshot + // doesn't map during a rolling deploy. Without this, an unmapped value would + // throw on `badge.variant` and crash the whole table render. Falls back to the + // same em-dash as the null state. Matches the StatusBadge `?? 'secondary'` + // pattern + the best_metric column's rolling-deploy backward-compat stance. + if (!badge) return ; + return ( + + + {badge.label} + + + ); + }, + }, { id: 'created_at', header: 'Created', diff --git a/ui/src/lib/glossary.ts b/ui/src/lib/glossary.ts index 81bc7fb4..572fe407 100644 --- a/ui/src/lib/glossary.ts +++ b/ui/src/lib/glossary.ts @@ -893,6 +893,17 @@ export const glossary = { ariaLabel: 'More information about the trailing-window comparison', }, + // --------------------------------------------------------------------------- + // feat_studies_convergence_visibility Story 1.2 — studies-list trial count. + // Source-of-truth: backend/app/api/v1/schemas.py StudySummary.trial_count + // (mirrors trials_summary.total — non-baseline optimization trials). + // --------------------------------------------------------------------------- + 'study.trial_count': { + short: + 'Number of optimization trials this study ran (the baseline trial is counted separately). More trials = more of the search space explored.', + ariaLabel: 'More information about the trial count', + }, + // --------------------------------------------------------------------------- // feat_auto_followup_studies Story 3.1 — 4 chain-panel + wizard entries. // Text from feature_spec.md §11 tooltip inventory. Mirrors the FR-9 diff --git a/ui/tests/e2e/studies-convergence-columns.spec.ts b/ui/tests/e2e/studies-convergence-columns.spec.ts new file mode 100644 index 00000000..f7e939ea --- /dev/null +++ b/ui/tests/e2e/studies-convergence-columns.spec.ts @@ -0,0 +1,69 @@ +// SPDX-FileCopyrightText: 2026 soundminds.ai +// +// SPDX-License-Identifier: Apache-2.0 + +/** + * E2E spec: /studies Trials + Convergence columns + * (feat_studies_convergence_visibility Story 1.2 / FR-4, AC-6). + * + * Runs against the real `make up` stack via the helpers in `./helpers/seed.ts`. + * No `page.route()` mocking per CLAUDE.md E2E rule — the columns are driven + * by the real backend's trial_count + convergence_verdict fields. + * + * A freshly seeded study runs only a handful of trials (well below the + * 50-trial warmup floor), so its convergence verdict is either + * "Too few trials" or null (em-dash) — both are valid renderings. The + * stable, deterministic assertions are therefore: + * - the "Trials" + "Convergence" column headers render; + * - the seeded study's row shows a numeric Trials cell; + * - the Convergence cell renders either a verdict badge or the em-dash + * null-state (never crashes / never blank). + */ +import { expect, test } from '@playwright/test'; + +import { seedFullChain, seedStudy } from './helpers/seed'; + +test.describe('/studies convergence columns', () => { + test('Trials + Convergence columns render against the real backend', async ({ page }) => { + const chain = await seedFullChain(2); + const study = await seedStudy({ + clusterId: chain.clusterId, + querySetId: chain.querySetId, + templateId: chain.templateId, + judgmentListId: chain.judgmentListId, + maxTrials: 3, + }); + + await page.goto('/studies'); + await expect(page.getByTestId('studies-table')).toBeVisible({ timeout: 5_000 }); + + // Both new column headers are present. + await expect( + page.getByTestId('studies-table').getByRole('columnheader', { name: /Trials/ }), + ).toBeVisible(); + await expect( + page.getByTestId('studies-table').getByRole('columnheader', { name: /Convergence/ }), + ).toBeVisible(); + + // Locate the seeded study's row and assert its Trials cell is numeric. + // Filter the list to the seeded study so its row is reliably on the first + // page regardless of how many other studies exist. + const suffix = study.name.replace(/^e2e-study-/, ''); + await page.getByTestId('data-table-search').fill(suffix); + await expect( + page.getByTestId('studies-table').getByText(study.name).first(), + ).toBeVisible({ timeout: 15_000 }); + + const row = page.locator(`[data-testid="study-row-${study.id}"]`); + await expect(row).toBeVisible(); + // The Trials cell shows a non-negative integer (tabular-nums span). + await expect(row.getByText(/^\d+$/).first()).toBeVisible(); + + // The Convergence cell renders either a verdict badge OR the em-dash + // null-state — both are correct for a low-trial study. Assert the cell + // is non-empty (not blank/undefined). + const convergenceBadge = row.locator('[data-verdict]'); + const emDash = row.getByText('—'); + await expect(convergenceBadge.or(emDash).first()).toBeVisible(); + }); +});