From a24e34ab0dbd754ac3eeb6f93baac737128b6d89 Mon Sep 17 00:00:00 2001 From: PatrickSys Date: Wed, 29 Apr 2026 15:27:03 +0200 Subject: [PATCH 1/2] feat: add UI proof validation contract --- README.md | 1 + agents/executor.md | 3 + agents/planner.md | 12 +- agents/verifier.md | 2 + bin/gsdd.mjs | 9 +- bin/lib/health.mjs | 24 +- bin/lib/init-runtime.mjs | 3 + bin/lib/lifecycle-state.mjs | 2 +- bin/lib/rendering.mjs | 5 + bin/lib/templates.mjs | 2 +- bin/lib/ui-proof.mjs | 411 ++++++++++++++++++ distilled/DESIGN.md | 46 +- distilled/EVIDENCE-INDEX.md | 7 + distilled/templates/delegates/plan-checker.md | 2 + distilled/templates/ui-proof.md | 174 ++++++++ distilled/workflows/audit-milestone.md | 1 + distilled/workflows/execute.md | 15 +- distilled/workflows/plan.md | 17 +- distilled/workflows/quick.md | 6 + distilled/workflows/verify.md | 10 +- tests/gsdd.guards.test.cjs | 80 ++++ tests/gsdd.health.test.cjs | 91 ++++ tests/gsdd.init.test.cjs | 4 + tests/phase.test.cjs | 308 +++++++++++++ 24 files changed, 1203 insertions(+), 32 deletions(-) create mode 100644 bin/lib/ui-proof.mjs create mode 100644 distilled/templates/ui-proof.md diff --git a/README.md b/README.md index 3a67b4c3..2d3ddafa 100644 --- a/README.md +++ b/README.md @@ -346,6 +346,7 @@ Workflows are agent skills or commands, not plain shell utilities. How you invok | `npx -y gsdd-cli init [--tools ]` | Set up `.planning/`, generate skills/adapters | | `npx -y gsdd-cli update [--tools ] [--templates]` | Regenerate skills/adapters and refresh the repo-local helper runtime; `--templates` refreshes `.planning/templates/` and role contracts | | `npx -y gsdd-cli health [--json]` | Check workspace integrity and generated-surface freshness (healthy/degraded/broken) | +| `npx -y gsdd-cli ui-proof validate [--claim ]` | Validate UI proof bundle metadata without requiring browser tooling; use `--claim` only when validating that stronger proof use | | `npx -y gsdd-cli file-op ` | Run deterministic workspace-confined file copy, delete, and regex substitution | | `npx -y gsdd-cli find-phase [N]` | Show phase info as JSON (for agent consumption) | | `npx -y gsdd-cli phase-status ` | Update a single ROADMAP phase status through the status-aware helper | diff --git a/agents/executor.md b/agents/executor.md index d3310364..1fef7997 100644 --- a/agents/executor.md +++ b/agents/executor.md @@ -209,6 +209,9 @@ Before reporting a task complete: - if a UI change is involved, verify the relevant rendering path - if an API change is involved, hit the endpoint or targeted integration path - A task is not complete because code was written. It is complete when the intended verification path actually passes. + +### UI Proof Execution +If the plan defines UI proof slots, record observed proof against the exact claim, route/state, observation, evidence kind, artifact path or manual step, privacy metadata, result, and claim limit before claiming task completion. Artifact metadata must include `visibility`, `retention`, `sensitivity`, and `safe_to_publish`; raw screenshots, traces, videos, DOM snapshots, and reports are local-only/unsafe by default and cannot back public, tracked, delivery, release, or publication proof claims. Use `gsdd ui-proof validate ` or `gsdd health` when a bundle exists. Artifact count, source comments, AST/cAST findings, semantic search, and Semble-like retrieval are not proof. Missing or weakly linked evidence must be recorded as proof debt, waiver, deferment, or reduced claim language rather than satisfied proof. diff --git a/agents/planner.md b/agents/planner.md index 76c0e9f3..12a1eb64 100644 --- a/agents/planner.md +++ b/agents/planner.md @@ -153,6 +153,14 @@ Any checkpoint must be justified by the task itself, not by planner caution or h Any plan containing `checkpoint:*` must set `autonomous: false`. + +For UI-sensitive work, plan proof slots that can later be matched exactly to claim, route/state, observation, evidence kind, artifact path or manual step, privacy metadata, result, and claim limit. Use only the stable evidence kinds `code`, `test`, `runtime`, `delivery`, and `human`. + +Require observed artifacts to carry `visibility`, `retention`, `sensitivity`, and `safe_to_publish`; when a planned slot is meant to support public, publication, tracked, delivery, or release proof, say to validate the observed bundle with `gsdd ui-proof validate --claim <...>`. `gsdd ui-proof validate`/`gsdd health` must catch invalid bundle metadata when present. + +Do not let source annotations, AST/cAST findings, semantic search, comments, or Semble-like retrieval satisfy proof slots; they are discovery hints only. Human acceptance can narrow or waive a claim and record proof debt, but it must not turn missing or mismatched non-human evidence into `satisfied` proof. + + Example dependency graph: @@ -184,7 +192,7 @@ Wave rule: Write one or more `PLAN.md` files to the phase directory. Keep the current GSDD schema exactly: -- frontmatter keys: `phase`, `plan`, `type`, `wave`, `runtime`, `assurance`, `depends_on`, `files-modified`, `autonomous`, `requirements`, `non_goals`, `hard_boundaries`, `escalation_triggers`, `approval_gates`, `anti_regression_targets`, `known_unknowns`, `high_leverage_surfaces`, `second_pass_required`, `closure_claim_limit`, `parallelism_budget`, `leverage`, `must_haves` +- frontmatter keys: `phase`, `plan`, `type`, `wave`, `runtime`, `assurance`, `depends_on`, `files-modified`, `autonomous`, `requirements`, `non_goals`, `hard_boundaries`, `escalation_triggers`, `approval_gates`, `anti_regression_targets`, `known_unknowns`, `ui_proof_slots`, `no_ui_proof_rationale`, `high_leverage_surfaces`, `second_pass_required`, `closure_claim_limit`, `parallelism_budget`, `leverage`, `must_haves` - typed tasks with `files`, `action`, `verify`, and `done` Typed frontmatter example: @@ -215,6 +223,8 @@ anti_regression_targets: - Existing session middleware behavior remains unchanged for already-supported routes. known_unknowns: - Exact copy wording for auth errors may still need product confirmation. +ui_proof_slots: [] +no_ui_proof_rationale: Not UI-sensitive; scoped work does not claim a visible UI outcome. high_leverage_surfaces: [] second_pass_required: false closure_claim_limit: Do not claim phase completion until verification satisfies the evidence contract for the scoped truths. diff --git a/agents/verifier.md b/agents/verifier.md index 72208e95..91efd167 100644 --- a/agents/verifier.md +++ b/agents/verifier.md @@ -123,6 +123,8 @@ Do not return a flat symptom list when the same underlying breakage explains mul Visual correctness, live interaction quality, and some external integrations still need explicit human checks. +For UI proof slots, fail closed unless observed proof is matched to the exact claim, route/state, observation, evidence kind, artifact path or manual step, privacy metadata, result, and claim limit. Artifact metadata must include `visibility`, `retention`, `sensitivity`, and `safe_to_publish`; local-only or unsafe artifacts cannot back public, tracked, delivery, release, or publication proof claims, and `gsdd ui-proof validate`/`gsdd health` metadata failures block the stronger proof claim. Screenshots, traces, reports, Gherkin, a11y scans, E2E outputs, manual notes, source annotations, AST/cAST findings, semantic search, comments, and Semble-like retrieval do not satisfy proof by existence alone. Human acceptance records risk, waiver, deferment, proof debt, or a narrowed claim; it does not upgrade missing or mismatched non-human proof to `satisfied`. + ## Step 9: Determine overall status - `passed` when all programmatic checks pass and no human-only checks remain diff --git a/bin/gsdd.mjs b/bin/gsdd.mjs index 7b83aead..c1f589ab 100644 --- a/bin/gsdd.mjs +++ b/bin/gsdd.mjs @@ -19,6 +19,7 @@ import { cmdFileOp } from './lib/file-ops.mjs'; import { createCmdHealth } from './lib/health.mjs'; import { cmdLifecyclePreflight } from './lib/lifecycle-preflight.mjs'; import { cmdSessionFingerprint } from './lib/session-fingerprint.mjs'; +import { cmdUiProof } from './lib/ui-proof.mjs'; import { resolveWorkspaceContext } from './lib/workspace-root.mjs'; const __filename = fileURLToPath(import.meta.url); @@ -107,6 +108,7 @@ const COMMANDS = { 'file-op': cmdFileOp, 'lifecycle-preflight': cmdLifecyclePreflight, 'session-fingerprint': cmdSessionFingerprint, + 'ui-proof': cmdUiProof, 'find-phase': cmdFindPhase, 'phase-status': cmdPhaseStatus, verify: cmdVerify, @@ -132,8 +134,5 @@ async function runCli(cliCommand = command, ...cliArgs) { await COMMANDS[cliCommand](...normalizedArgs); } -if (IS_MAIN) { - await runCli(); -} - -export { cmdHelp, cmdInit, cmdUpdate, cmdModels, cmdHealth, cmdFileOp, cmdLifecyclePreflight, cmdSessionFingerprint, cmdFindPhase, cmdPhaseStatus, cmdVerify, cmdScaffold, runCli, FRAMEWORK_VERSION, createCliContext }; +if (IS_MAIN) await runCli(); +export { cmdHelp, cmdInit, cmdUpdate, cmdModels, cmdHealth, cmdFileOp, cmdLifecyclePreflight, cmdSessionFingerprint, cmdUiProof, cmdFindPhase, cmdPhaseStatus, cmdVerify, cmdScaffold, runCli, FRAMEWORK_VERSION, createCliContext }; diff --git a/bin/lib/health.mjs b/bin/lib/health.mjs index 07fbdbf7..20ff35dd 100644 --- a/bin/lib/health.mjs +++ b/bin/lib/health.mjs @@ -4,12 +4,13 @@ // evaluate once, so CWD must be computed inside function bodies. import { existsSync, readFileSync, readdirSync } from 'fs'; -import { join } from 'path'; +import { join, relative } from 'path'; import { readManifest, detectModifications } from './manifest.mjs'; import { output } from './cli-utils.mjs'; import { runTruthChecks, TRUTH_CHECK_IDS } from './health-truth.mjs'; import { evaluateLifecycleState } from './lifecycle-state.mjs'; import { evaluateRuntimeFreshness } from './runtime-freshness.mjs'; +import { findUiProofBundleFiles, readUiProofBundleFile, validateUiProofBundle } from './ui-proof.mjs'; import { resolveWorkspaceContext } from './workspace-root.mjs'; /** @@ -31,7 +32,7 @@ export function createCmdHealth(ctx) { } const cwd = workspaceRoot; const frameworkSourceMode = isFrameworkSourceRepo(cwd); - const healthCheckIds = ['E1', 'E2', 'E3', 'E4', 'E5', 'E6', 'E7', 'E8', 'E9', 'W1', 'W2', 'W3', 'W4', 'W5', 'W6', ...TRUTH_CHECK_IDS, 'I1', 'I2', 'I3']; + const healthCheckIds = ['E1', 'E2', 'E3', 'E4', 'E5', 'E6', 'E7', 'E8', 'E9', 'E10', 'W1', 'W2', 'W3', 'W4', 'W5', 'W6', ...TRUTH_CHECK_IDS, 'I1', 'I2', 'I3']; // Pre-init guard if (!existsSync(join(planningDir, 'config.json'))) { @@ -123,7 +124,7 @@ export function createCmdHealth(ctx) { } // E8: critical root template files missing - const requiredRootFiles = ['spec.md', 'roadmap.md', 'auth-matrix.md']; + const requiredRootFiles = ['spec.md', 'roadmap.md', 'auth-matrix.md', 'ui-proof.md']; const missingRoot = requiredRootFiles.filter((f) => !existsSync(join(templatesDir, f))); if (missingRoot.length > 0) { errors.push({ id: 'E8', severity: 'ERROR', message: `.planning/templates/ missing critical root files: ${missingRoot.join(', ')}`, fix: 'Run `npx -y gsdd-cli update --templates`' }); @@ -140,6 +141,23 @@ export function createCmdHealth(ctx) { } } + // E10: known UI proof bundles must satisfy deterministic metadata/privacy validation. + for (const bundlePath of findUiProofBundleFiles(planningDir)) { + const relativePath = relative(cwd, bundlePath).replace(/\\/g, '/'); + const parsed = readUiProofBundleFile(bundlePath); + const validation = parsed.errors.length > 0 + ? { valid: false, errors: parsed.errors } + : validateUiProofBundle(parsed.bundle); + if (!validation.valid) { + errors.push({ + id: 'E10', + severity: 'ERROR', + message: `${relativePath} has invalid UI proof metadata (${validation.errors.map((entry) => entry.code).join(', ')})`, + fix: 'Run `gsdd ui-proof validate ` and add required privacy metadata, claim limits, fixed evidence kinds, observation artifact references, and safe-to-publish handling.', + }); + } + } + // --- WARNING checks --- // W1: generation-manifest.json missing diff --git a/bin/lib/init-runtime.mjs b/bin/lib/init-runtime.mjs index 1a8034d1..e0b47636 100644 --- a/bin/lib/init-runtime.mjs +++ b/bin/lib/init-runtime.mjs @@ -187,6 +187,8 @@ Commands: lifecycle-preflight [phase] Inspect deterministic lifecycle gate results for a workflow surface session-fingerprint write Rebaseline planning-state drift after reviewing changed planning files + ui-proof validate [--claim ] + Validate UI proof metadata; use --claim for stronger proof uses help Show this summary Platforms (for --tools): @@ -257,6 +259,7 @@ Advanced/internal helpers (kept available, but not the primary first-run user st lifecycle-preflight Inspect deterministic lifecycle gate results for a workflow surface session-fingerprint Rebaseline the local planning-state fingerprint after review phase-status Update ROADMAP.md phase status through the local helper surface + ui-proof Validate UI proof metadata; use --claim for stronger proof uses file-op Deterministic workspace-confined file copy/delete/text mutation `; } diff --git a/bin/lib/lifecycle-state.mjs b/bin/lib/lifecycle-state.mjs index eead8d23..8039adf3 100644 --- a/bin/lib/lifecycle-state.mjs +++ b/bin/lib/lifecycle-state.mjs @@ -6,7 +6,7 @@ const BROWNFIELD_CHANGE_DIR = 'brownfield-change'; const PHASE_LINE_RE = /^\s*[-*]\s*\[([ x-])\]\s*\*\*Phase\s+(\d+(?:\.\d+)*[a-z]?):\s*(.+?)\*\*(?:\s+—\s+\[([^\]]+)])?/i; const PHASE_DETAIL_HEADING_RE = /^(#{3,})\s+Phase\s+(\d+(?:\.\d+)*[a-z]?)(?::|\b)/i; const PHASE_DETAIL_STATUS_RE = /^\s*\*\*Status\*\*:\s*\[([ x-])\]/i; -const ACTIVE_MILESTONE_HEADING_RE = /^###\s+(v[^\s]+)\s+(.+)$/im; +const ACTIVE_MILESTONE_HEADING_RE = /^#{2,3}\s+(v[^\s]+)\s+(.+)$/im; const MILESTONE_LEDGER_HEADING_RE = /^##\s+(?:✅\s+)?(v[^\s]+)\s*(?:—|-)?\s*(.*)$/i; const DETAILS_OPEN_RE = //i; diff --git a/bin/lib/rendering.mjs b/bin/lib/rendering.mjs index b0e047be..49212513 100644 --- a/bin/lib/rendering.mjs +++ b/bin/lib/rendering.mjs @@ -13,6 +13,7 @@ const HELPER_LIB_FILES = Object.freeze([ 'lifecycle-state.mjs', 'phase.mjs', 'session-fingerprint.mjs', + 'ui-proof.mjs', 'workspace-root.mjs', ]); @@ -47,6 +48,7 @@ import { cmdFileOp } from './lib/file-ops.mjs'; import { cmdLifecyclePreflight } from './lib/lifecycle-preflight.mjs'; import { cmdPhaseStatus } from './lib/phase.mjs'; import { cmdSessionFingerprint } from './lib/session-fingerprint.mjs'; +import { cmdUiProof } from './lib/ui-proof.mjs'; import { bootstrapHelperWorkspace, consumeWorkspaceRootArg, resolveWorkspaceContext } from './lib/workspace-root.mjs'; const COMMANDS = { @@ -54,6 +56,7 @@ const COMMANDS = { 'lifecycle-preflight': cmdLifecyclePreflight, 'phase-status': cmdPhaseStatus, 'session-fingerprint': cmdSessionFingerprint, + 'ui-proof': cmdUiProof, }; function printHelp() { @@ -71,6 +74,8 @@ function printHelp() { ' Example: node .planning/bin/gsdd.mjs lifecycle-preflight verify 1 --expects-mutation phase-status', ' session-fingerprint write', ' Rebaseline planning-state drift after reviewing changed planning files', + ' ui-proof validate [--claim ]', + ' Validate UI proof metadata; use --claim for stronger proof uses', '', 'Advanced option:', ' --workspace-root Override workspace root discovery before or after the subcommand', diff --git a/bin/lib/templates.mjs b/bin/lib/templates.mjs index 113e2c43..d5e08550 100644 --- a/bin/lib/templates.mjs +++ b/bin/lib/templates.mjs @@ -23,7 +23,7 @@ export function installProjectTemplates({ planningDir, distilledDir, agentsDir } console.log(` - WARN: missing expected template subdir: ${subdir}/`); } } - const expectedRootFiles = ['spec.md', 'roadmap.md', 'auth-matrix.md']; + const expectedRootFiles = ['spec.md', 'roadmap.md', 'auth-matrix.md', 'ui-proof.md']; for (const file of expectedRootFiles) { if (!existsSync(join(localTemplatesDir, file))) { console.log(` - WARN: missing expected root template file: ${file}`); diff --git a/bin/lib/ui-proof.mjs b/bin/lib/ui-proof.mjs new file mode 100644 index 00000000..9d6e2c78 --- /dev/null +++ b/bin/lib/ui-proof.mjs @@ -0,0 +1,411 @@ +import { existsSync, readFileSync, readdirSync, statSync } from 'fs'; +import { isAbsolute, join, relative, resolve } from 'path'; +import { output } from './cli-utils.mjs'; +import { resolveWorkspaceContext } from './workspace-root.mjs'; + +const EVIDENCE_KINDS = Object.freeze(['code', 'test', 'runtime', 'delivery', 'human']); +const COMPARISON_STATUSES = Object.freeze(['satisfied', 'partial', 'missing', 'waived', 'deferred', 'not_applicable']); +const CLAIM_STATUSES = Object.freeze(['passed', 'failed', 'partial', 'waived', 'deferred', 'not_applicable']); +const ARTIFACT_VISIBILITIES = Object.freeze(['local_only', 'repo_tracked', 'public']); +const RAW_ARTIFACT_TYPES = Object.freeze(['screenshot', 'trace', 'video', 'dom_snapshot', 'dom-snapshot', 'dom', 'report']); +const PUBLIC_CLAIM_USES = Object.freeze(['public', 'publication', 'tracked', 'delivery', 'release']); +const CLAIM_USES = Object.freeze([...PUBLIC_CLAIM_USES, 'local', 'local_only']); +const REQUIRED_BUNDLE_FIELDS = Object.freeze([ + 'proof_bundle_version', + 'scope', + 'route_state', + 'environment', + 'viewport', + 'evidence_inputs', + 'commands_or_manual_steps', + 'observations', + 'artifacts', + 'privacy', + 'result', + 'claim_limits', +]); +const REQUIRED_SCOPE_FIELDS = Object.freeze(['work_item', 'claim', 'requirement_ids', 'slot_ids']); +const REQUIRED_ARTIFACT_FIELDS = Object.freeze(['visibility', 'retention', 'sensitivity', 'safe_to_publish']); +const REQUIRED_OBSERVATION_FIELDS = Object.freeze(['observation', 'claim', 'route_state', 'evidence_kind', 'artifact_refs', 'privacy', 'result', 'claim_limit']); +const REQUIRED_PRIVACY_FIELDS = Object.freeze(['data_classification', 'raw_artifacts_safe_to_publish', 'retention']); + +class UiProofError extends Error {} + +function fail(message) { + console.error(message); + throw new UiProofError(message); +} + +function isPlainObject(value) { + return value !== null && typeof value === 'object' && !Array.isArray(value); +} + +function hasValue(value) { + if (value === undefined || value === null) return false; + if (typeof value === 'string') return value.trim() !== ''; + if (Array.isArray(value)) return value.length > 0; + if (isPlainObject(value)) return Object.keys(value).length > 0; + return true; +} + +function pathLabel(basePath, key) { + return basePath ? `${basePath}.${key}` : key; +} + +function addError(errors, code, path, message, fix) { + errors.push({ code, path, message, fix }); +} + +function requireField(obj, field, path, errors) { + if (!isPlainObject(obj) || !hasValue(obj[field])) { + addError(errors, 'missing_required_field', pathLabel(path, field), `Missing required UI proof field: ${pathLabel(path, field)}`, 'Add the required field to the proof bundle metadata.'); + return false; + } + return true; +} + +function normalizeArray(value) { + if (Array.isArray(value)) return value; + if (typeof value === 'string' && value.trim()) return [value.trim()]; + return []; +} + +function artifactType(artifact) { + const explicit = typeof artifact.type === 'string' ? artifact.type.toLowerCase() : ''; + if (explicit) return explicit; + const artifactPath = typeof artifact.path === 'string' ? artifact.path.toLowerCase() : ''; + if (/screenshot|\.png$|\.jpe?g$|\.webp$/.test(artifactPath)) return 'screenshot'; + if (/trace|\.zip$/.test(artifactPath)) return 'trace'; + if (/video|\.mp4$|\.webm$|\.mov$/.test(artifactPath)) return 'video'; + if (/dom|\.html?$/.test(artifactPath)) return 'dom_snapshot'; + if (/report/.test(artifactPath)) return 'report'; + return explicit; +} + +function isRawUiArtifact(artifact) { + return RAW_ARTIFACT_TYPES.includes(artifactType(artifact)); +} + +function collectClaimUses(bundle, options) { + const uses = new Set(); + for (const value of normalizeArray(options.claimUse).concat(normalizeArray(options.claimUses))) { + uses.add(String(value).toLowerCase()); + } + + const explicitSources = [ + bundle?.proof_claim, + bundle?.proof_claims, + bundle?.claim_context?.proof_use, + bundle?.claim_context?.proof_uses, + bundle?.publication?.intended_use, + ]; + for (const source of explicitSources) { + for (const value of normalizeArray(source)) uses.add(String(value).toLowerCase()); + } + + return [...uses]; +} + +function validateClaimUses(bundle, options, errors) { + for (const value of collectClaimUses(bundle, options)) { + if (!CLAIM_USES.includes(value)) { + addError(errors, 'unsupported_claim_use', 'proof_claim', `Unsupported UI proof claim use: ${value}`, `Use only: ${CLAIM_USES.join(', ')}.`); + } + } +} + +function hasPublicClaim(bundle, options) { + return collectClaimUses(bundle, options).some((value) => PUBLIC_CLAIM_USES.includes(value)); +} + +function validateObservationPrivacy(privacy, path, errors) { + for (const field of REQUIRED_PRIVACY_FIELDS) requireField(privacy, field, path, errors); + if (hasValue(privacy?.raw_artifacts_safe_to_publish) && typeof privacy.raw_artifacts_safe_to_publish !== 'boolean') { + addError(errors, 'invalid_raw_artifacts_safe_to_publish', `${path}.raw_artifacts_safe_to_publish`, 'raw_artifacts_safe_to_publish must be a boolean.', 'Use false unless all raw artifacts are explicitly safe to publish.'); + } +} + +function validateObservations(bundle, errors) { + for (const [index, observation] of normalizeArray(bundle?.observations).entries()) { + if (!isPlainObject(observation)) continue; + const observationPath = `observations[${index}]`; + for (const field of REQUIRED_OBSERVATION_FIELDS) requireField(observation, field, observationPath, errors); + if (hasValue(observation.evidence_kind) && !EVIDENCE_KINDS.includes(observation.evidence_kind)) { + addError(errors, 'unsupported_evidence_kind', `${observationPath}.evidence_kind`, `Unsupported UI proof observation evidence kind: ${observation.evidence_kind}`, `Use only: ${EVIDENCE_KINDS.join(', ')}.`); + } + if (hasValue(observation.result) && !CLAIM_STATUSES.includes(observation.result)) { + addError(errors, 'invalid_observation_result', `${observationPath}.result`, `Invalid UI proof observation result: ${observation.result}`, `Use only: ${CLAIM_STATUSES.join(', ')}.`); + } + validateObservationPrivacy(observation.privacy, `${observationPath}.privacy`, errors); + } +} + +function validateEvidenceKinds(bundle, errors) { + const kinds = normalizeArray(bundle?.evidence_inputs?.kinds); + if (kinds.length === 0) { + addError(errors, 'missing_evidence_kinds', 'evidence_inputs.kinds', 'Missing UI proof evidence kinds.', 'Record at least one fixed evidence kind: code, test, runtime, delivery, or human.'); + } + for (const [index, kind] of kinds.entries()) { + if (!EVIDENCE_KINDS.includes(kind)) { + addError(errors, 'unsupported_evidence_kind', `evidence_inputs.kinds[${index}]`, `Unsupported UI proof evidence kind: ${kind}`, `Use only: ${EVIDENCE_KINDS.join(', ')}.`); + } + } +} + +function validateResult(bundle, errors) { + if (!isPlainObject(bundle?.result)) return; + if (!hasValue(bundle.result.claim_status)) { + addError(errors, 'missing_claim_status', 'result.claim_status', 'Missing UI proof result claim status.', `Record claim_status using: ${CLAIM_STATUSES.join(', ')}.`); + } else if (!CLAIM_STATUSES.includes(bundle.result.claim_status)) { + addError(errors, 'invalid_claim_status', 'result.claim_status', `Invalid UI proof claim status: ${bundle.result.claim_status}`, `Use only: ${CLAIM_STATUSES.join(', ')}.`); + } +} + +function validateComparisonStatuses(bundle, errors) { + const statuses = bundle?.result?.comparison_status_by_slot; + if (!isPlainObject(statuses)) { + addError(errors, 'missing_comparison_statuses', 'result.comparison_status_by_slot', 'Missing UI proof comparison statuses by slot.', `Record one status per slot using: ${COMPARISON_STATUSES.join(', ')}.`); + return; + } + const slotIds = normalizeArray(bundle?.scope?.slot_ids); + const slotSet = new Set(slotIds); + for (const slotId of slotIds) { + if (!hasValue(statuses[slotId])) { + addError(errors, 'missing_comparison_status', `result.comparison_status_by_slot.${slotId}`, `Missing UI proof comparison status for slot: ${slotId}`, `Record one status per slot using: ${COMPARISON_STATUSES.join(', ')}.`); + } + } + for (const [slot, status] of Object.entries(statuses)) { + if (slotSet.size > 0 && !slotSet.has(slot)) { + addError(errors, 'unknown_comparison_slot', `result.comparison_status_by_slot.${slot}`, `UI proof comparison status references undeclared slot: ${slot}`, 'Use only slot IDs declared in scope.slot_ids.'); + } + if (!COMPARISON_STATUSES.includes(status)) { + addError(errors, 'invalid_comparison_status', `result.comparison_status_by_slot.${slot}`, `Invalid UI proof comparison status: ${status}`, `Use only: ${COMPARISON_STATUSES.join(', ')}.`); + } + } +} + +function validateClaimLimits(bundle, errors) { + const claimLimits = normalizeArray(bundle?.claim_limits); + if (claimLimits.length === 0) { + addError(errors, 'missing_claim_limits', 'claim_limits', 'Missing UI proof claim limits.', 'Add at least one claim limit that narrows what this proof does not prove.'); + } +} + +function artifactReference(artifact) { + if (!isPlainObject(artifact)) return null; + if (typeof artifact.path === 'string' && artifact.path.trim()) return artifact.path.trim(); + if (typeof artifact.url === 'string' && artifact.url.trim()) return artifact.url.trim(); + return null; +} + +function validateArtifacts(bundle, errors, publicClaim) { + const artifacts = normalizeArray(bundle?.artifacts); + if (artifacts.length === 0) { + addError(errors, 'missing_artifacts', 'artifacts', 'Missing UI proof artifacts list.', 'Record artifact metadata for each referenced proof artifact.'); + return new Set(); + } + + const artifactRefs = new Set(); + for (const [index, artifact] of artifacts.entries()) { + const artifactPath = `artifacts[${index}]`; + if (!isPlainObject(artifact)) { + addError(errors, 'invalid_artifact', artifactPath, 'UI proof artifact entry must be an object.', 'Record path/type plus privacy metadata for each artifact.'); + continue; + } + const ref = artifactReference(artifact); + if (!ref) { + addError(errors, 'missing_artifact_ref', artifactPath, 'UI proof artifact must include path or url.', 'Reference raw UI artifacts by path or URL; do not inline them.'); + } else { + artifactRefs.add(ref); + } + for (const field of REQUIRED_ARTIFACT_FIELDS) { + requireField(artifact, field, artifactPath, errors); + } + if (hasValue(artifact.visibility) && !ARTIFACT_VISIBILITIES.includes(artifact.visibility)) { + addError(errors, 'invalid_visibility', `${artifactPath}.visibility`, `Invalid UI proof artifact visibility: ${artifact.visibility}`, `Use only: ${ARTIFACT_VISIBILITIES.join(', ')}.`); + } + if (hasValue(artifact.safe_to_publish) && typeof artifact.safe_to_publish !== 'boolean') { + addError(errors, 'invalid_safe_to_publish', `${artifactPath}.safe_to_publish`, 'safe_to_publish must be a boolean.', 'Use true only after explicit safe-to-publish classification; otherwise use false.'); + } + if (isRawUiArtifact(artifact) && artifact.visibility !== 'local_only' && artifact.safe_to_publish !== true) { + addError(errors, 'unsafe_raw_artifact', artifactPath, 'Raw UI artifacts are local-only by default unless explicitly classified safe to publish.', 'Set visibility: local_only and safe_to_publish: false, or document sanitized public-safe classification.'); + } + if (publicClaim && (artifact.visibility === 'local_only' || artifact.safe_to_publish !== true)) { + addError(errors, 'unsafe_public_proof_claim', artifactPath, 'Public/tracked/delivery UI proof claims cannot rely on local-only or unsafe artifacts.', 'Use local-only claim language, or provide sanitized artifacts with safe_to_publish: true and non-local visibility.'); + } + } + return artifactRefs; +} + +function validatePrivacy(bundle, errors) { + validateObservationPrivacy(bundle.privacy, 'privacy', errors); +} + +function validateObservationArtifactRefs(bundle, artifactRefs, errors) { + for (const [index, observation] of normalizeArray(bundle?.observations).entries()) { + if (!isPlainObject(observation)) continue; + for (const [refIndex, ref] of normalizeArray(observation.artifact_refs).entries()) { + if (!artifactRefs.has(ref)) { + addError(errors, 'unknown_artifact_ref', `observations[${index}].artifact_refs[${refIndex}]`, `Observation references undeclared UI proof artifact: ${ref}`, 'Add the artifact to artifacts[] or correct the observation artifact reference.'); + } + } + } +} + +export function validateUiProofBundle(bundle, options = {}) { + const errors = []; + const warnings = []; + + if (!isPlainObject(bundle)) { + addError(errors, 'invalid_bundle', '', 'UI proof bundle must be an object.', 'Provide structured UI proof metadata.'); + return { valid: false, errors, warnings }; + } + + for (const field of REQUIRED_BUNDLE_FIELDS) requireField(bundle, field, '', errors); + for (const field of REQUIRED_SCOPE_FIELDS) requireField(bundle.scope, field, 'scope', errors); + validateClaimUses(bundle, options, errors); + validateEvidenceKinds(bundle, errors); + validateObservations(bundle, errors); + validateResult(bundle, errors); + validateComparisonStatuses(bundle, errors); + validateClaimLimits(bundle, errors); + validatePrivacy(bundle, errors); + const artifactRefs = validateArtifacts(bundle, errors, hasPublicClaim(bundle, options)); + validateObservationArtifactRefs(bundle, artifactRefs, errors); + + return { valid: errors.length === 0, errors, warnings }; +} + +export function parseUiProofBundleContent(content, filePath = 'UI proof bundle') { + const trimmed = content.trim(); + if (!trimmed) { + return { bundle: null, errors: [{ code: 'empty_bundle_file', path: filePath, message: 'UI proof bundle file is empty.', fix: 'Write JSON UI proof metadata before validating.' }] }; + } + + const jsonCandidates = [trimmed]; + const fenceMatches = [...trimmed.matchAll(/```(?:json|ui-proof-json)?\s*([\s\S]*?)```/gi)]; + for (const match of fenceMatches) jsonCandidates.push(match[1].trim()); + + for (const candidate of jsonCandidates) { + try { + return { bundle: JSON.parse(candidate), errors: [] }; + } catch { + // Try next candidate; final error is reported below. + } + } + + return { + bundle: null, + errors: [{ code: 'unparseable_bundle', path: filePath, message: 'UI proof bundle metadata is not valid JSON.', fix: 'Use a .json proof bundle or a markdown fenced JSON block; no YAML parser dependency is installed.' }], + }; +} + +export function readUiProofBundleFile(filePath) { + return parseUiProofBundleContent(readFileSync(filePath, 'utf-8'), filePath); +} + +function walkForUiProofFiles(dir, results) { + if (!existsSync(dir)) return; + for (const entry of readdirSync(dir)) { + const fullPath = join(dir, entry); + const stat = statSync(fullPath); + if (stat.isDirectory()) { + walkForUiProofFiles(fullPath, results); + continue; + } + const name = entry.toLowerCase(); + if (['ui-proof.json', 'ui-proof.md', 'proof-bundle.json'].includes(name)) { + results.add(fullPath); + } + } +} + +export function findUiProofBundleFiles(planningDir) { + const results = new Set(); + for (const relativePath of [ + 'UI-PROOF.json', + 'ui-proof.json', + 'ui-proof.md', + 'ui-proof/UI-PROOF.json', + 'ui-proof/proof-bundle.json', + 'brownfield-change/UI-PROOF.json', + ]) { + const fullPath = join(planningDir, relativePath); + if (existsSync(fullPath)) results.add(fullPath); + } + for (const relativeDir of ['phases', 'quick', 'brownfield-change']) { + walkForUiProofFiles(join(planningDir, relativeDir), results); + } + return [...results].sort(); +} + +function resolveWorkspacePath(cwd, target) { + const workspaceRoot = resolve(cwd); + const resolved = resolve(workspaceRoot, target); + const rel = relative(workspaceRoot, resolved); + if (rel === '' || (!rel.startsWith('..') && !isAbsolute(rel))) return resolved; + fail(`Path must stay inside the workspace: ${target}`); +} + +function parseClaimUse(args) { + const values = []; + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]; + if (arg !== '--claim') fail('Usage: gsdd ui-proof validate [--claim ]'); + const value = args[index + 1]; + if (!value || value.startsWith('--')) fail('Usage: gsdd ui-proof validate [--claim ]'); + values.push(...value.split(',').map((entry) => entry.trim()).filter(Boolean)); + index += 1; + } + for (const value of values) { + if (!PUBLIC_CLAIM_USES.includes(value)) fail(`Unsupported UI proof claim use: ${value}`); + } + return values; +} + +function cmdValidate(cwd, args) { + const [targetArg, ...flags] = args; + if (!targetArg) fail('Usage: gsdd ui-proof validate [--claim ]'); + const target = resolveWorkspacePath(cwd, targetArg); + if (!existsSync(target) || statSync(target).isDirectory()) fail(`UI proof bundle file does not exist: ${targetArg}`); + + const parsed = readUiProofBundleFile(target); + const validation = parsed.errors.length > 0 + ? { valid: false, errors: parsed.errors, warnings: [] } + : validateUiProofBundle(parsed.bundle, { claimUses: parseClaimUse(flags) }); + + output({ operation: 'ui-proof validate', target: targetArg, valid: validation.valid, errors: validation.errors, warnings: validation.warnings }); + if (!validation.valid) process.exitCode = 1; +} + +export function cmdUiProof(...args) { + const { args: normalizedArgs, workspaceRoot, invalid, error } = resolveWorkspaceContext(args); + if (invalid) { + console.error(error); + process.exitCode = 1; + return; + } + const [operation, ...rest] = normalizedArgs; + try { + switch (operation) { + case 'validate': + cmdValidate(workspaceRoot, rest); + return; + default: + fail('Usage: gsdd ui-proof validate [--claim ]'); + } + } catch (error) { + if (error instanceof UiProofError) { + process.exitCode = 1; + return; + } + throw error; + } +} + +export { + ARTIFACT_VISIBILITIES as UI_PROOF_ARTIFACT_VISIBILITIES, + COMPARISON_STATUSES as UI_PROOF_COMPARISON_STATUSES, + EVIDENCE_KINDS as UI_PROOF_EVIDENCE_KINDS, + RAW_ARTIFACT_TYPES as UI_PROOF_RAW_ARTIFACT_TYPES, +}; diff --git a/distilled/DESIGN.md b/distilled/DESIGN.md index 2a023dfa..744810f1 100644 --- a/distilled/DESIGN.md +++ b/distilled/DESIGN.md @@ -72,6 +72,7 @@ 59. [Continuity Authority And Planning-State Drift](#d59---continuity-authority-and-planning-state-drift) 60. [Release Closeout Contract](#d60---release-closeout-contract) 61. [Deliberate Subagent Contract](#d61---deliberate-subagent-contract) +62. [Repo-Native UI Proof Contract](#d62---repo-native-ui-proof-contract) --- @@ -956,8 +957,9 @@ Implementation lives under `bin/lib/`: | E5 | ERROR | `.planning/templates/delegates/` missing or empty | | E6 | ERROR | `.planning/templates/research/` missing or empty | | E7 | ERROR | `.planning/templates/codebase/` missing or empty | -| E8 | ERROR | `.planning/templates/` missing critical root files (`spec.md`, `roadmap.md`, `auth-matrix.md`) | +| E8 | ERROR | `.planning/templates/` missing critical root files (`spec.md`, `roadmap.md`, `auth-matrix.md`, `ui-proof.md`) | | E9 | ERROR | `.planning/templates/brownfield-change/` missing or missing critical files (`CHANGE.md`, `HANDOFF.md`, `VERIFICATION.md`) | +| E10 | ERROR | Known UI proof bundle metadata is unparseable or fails deterministic privacy/claim validation | | W1 | WARN | `generation-manifest.json` missing | | W2 | WARN | Manifest-tracked installed templates/helpers modified locally (hash mismatch vs manifest) | | W3 | WARN | Manifest-tracked installed templates/helpers missing from disk but listed in manifest | @@ -2813,6 +2815,48 @@ Posture compatibility is part of that closeout contract: `repo_closeout` and `ru --- +## D62 - Repo-Native UI Proof Contract + +**Decision (2026-04-28):** UI-sensitive work should carry a compact planned proof-slot contract and, when executed, an observed UI proof bundle that references artifacts by path or link while preserving the existing closure evidence kinds: `code`, `test`, `runtime`, `delivery`, and `human`. + +**Context:** +- UI proof targets the recurring failure mode where agents claim a UI works or looks good without rendered proof, matched observations, or explicit human judgment. +- The contract defines proof slots, proof bundles, comparison statuses, fail-closed agent guardrails, deterministic metadata validation, privacy metadata, and health visibility without adding a browser-provider framework. +- GSD's archived planner, executor, and verifier roles preserve strong lifecycle discipline, but they do not provide this UI-specific planned-vs-observed proof model. GSDD keeps the lifecycle leverage and adds a repo-native UI proof substrate without adding a browser-provider framework. + +**Decision:** +- Planning must classify UI-sensitive work and require either `ui_proof_slots` or an explicit `no_ui_proof_rationale`. +- Planned slots record claim, route/state, required evidence kinds, minimum observations, environment/viewport, manual-acceptance requirement, claim limit, and requirement IDs. +- Observed proof bundles record claim, requirement/slot IDs, route/state, environment, viewport, evidence inputs, commands/manual steps, observations, artifacts, privacy metadata, result, and claim limits. +- Verification compares planned slots to observed bundles using `satisfied`, `partial`, `missing`, `waived`, `deferred`, and `not_applicable`; waiver and deferral are not proof. +- UI correctness claims fail closed unless rendered proof is matched exactly to claim, route/state, observation, evidence kind, artifact path or manual step, privacy metadata, result, and claim limit, or an explicit waiver/deferment narrows the claim. +- Human acceptance may close a narrowed claim and record proof debt, but it must not convert missing or mismatched non-human evidence into `satisfied` proof. +- Screenshots, traces, videos, reports, accessibility scans, Gherkin, and visual diffs are artifact types or activities mapped onto the five existing evidence kinds, not new evidence kinds. +- Source annotations, AST/cAST findings, semantic search hits, comments, and Semble-like retrieval may discover proof obligations, but they are discovery hints only and do not satisfy proof slots. +- Visual taste, accessibility judgment, baseline acceptance, subjective polish/layout quality, and privacy publication require human evidence or explicit waiver, and human approval does not replace required `code`, `test`, `runtime`, or `delivery` evidence. +- Deterministic metadata enforcement keeps the evidence and comparison-status vocabularies unchanged: artifact entries require `visibility`, `retention`, `sensitivity`, and `safe_to_publish`; raw screenshots, traces, videos, DOM snapshots, and reports default to `local_only` plus `safe_to_publish: false`; `bin/lib/ui-proof.mjs` validates required bundle/observation fields, fixed evidence kinds, claim/result statuses, comparison statuses, claim limits, privacy metadata, and public/tracked/delivery proof claims backed by local-only or unsafe artifacts. +- `gsdd health` reports invalid known UI proof bundles as E10 using the same validator, staying read-only and metadata-only. + +**Leverage:** +- Lost: UI-sensitive work now carries a small proof-contract burden, and invalid proof metadata can degrade/break health before agents can claim rendered UI outcomes. +- Kept: repo-native markdown artifacts, optional project tooling, fixed closure evidence kinds, generated-surface freshness, and the plan/execute/verify separation. +- Gained: exact claim-to-proof traceability, strict comparison statuses, privacy and claim-limit metadata, fail-closed overclaim guardrails, deterministic metadata validation, and health-visible protection against unsafe public proof claims. + +**Evidence:** +- `distilled/templates/ui-proof.md` +- `distilled/workflows/plan.md`, `distilled/workflows/execute.md`, `distilled/workflows/quick.md`, `distilled/workflows/verify.md` +- `agents/planner.md`, `agents/executor.md`, `agents/verifier.md`, `distilled/templates/delegates/plan-checker.md` +- `bin/lib/templates.mjs`, `bin/lib/ui-proof.mjs`, `bin/lib/health.mjs`, `bin/lib/rendering.mjs` +- `tests/phase.test.cjs`, `tests/gsdd.guards.test.cjs`, `tests/gsdd.health.test.cjs`, `tests/gsdd.init.test.cjs` +- GSD comparison: the upstream planner, executor, and verifier role patterns preserve lifecycle rigor, but they do not define UI proof slots or planned-vs-observed UI proof bundles. + +**Consequences:** +- Future UI-related phases must not add new evidence kinds by treating artifact types as proof categories. +- Future dogfood or runtime validation must not upgrade artifact counts or human waivers into proof. +- Generated runtime surfaces and local templates must stay freshness-checkable through `gsdd update --templates` and health diagnostics. + +--- + ## Maintenance This document is updated when: diff --git a/distilled/EVIDENCE-INDEX.md b/distilled/EVIDENCE-INDEX.md index 6409dae9..42a3c05c 100644 --- a/distilled/EVIDENCE-INDEX.md +++ b/distilled/EVIDENCE-INDEX.md @@ -485,6 +485,13 @@ - GSD comparison source: `get-shit-done/workflows/new-project.md` - `tests/gsdd.guards.test.cjs`, `tests/gsdd.invariants.test.cjs` +## D62 — Repo-Native UI Proof Contract +- `distilled/templates/ui-proof.md` +- `distilled/workflows/plan.md`, `distilled/workflows/execute.md`, `distilled/workflows/quick.md`, `distilled/workflows/verify.md` +- `agents/planner.md`, `agents/executor.md`, `agents/verifier.md`, `distilled/templates/delegates/plan-checker.md` +- `bin/lib/templates.mjs`, `bin/lib/ui-proof.mjs`, `bin/lib/health.mjs`, `bin/lib/rendering.mjs` +- `tests/phase.test.cjs`, `tests/gsdd.guards.test.cjs`, `tests/gsdd.health.test.cjs`, `tests/gsdd.init.test.cjs` + --- ## Maintenance diff --git a/distilled/templates/delegates/plan-checker.md b/distilled/templates/delegates/plan-checker.md index e1172b70..29625a88 100644 --- a/distilled/templates/delegates/plan-checker.md +++ b/distilled/templates/delegates/plan-checker.md @@ -34,6 +34,8 @@ Verify these dimensions: - `anti_regression_capture`: known prior failures, compatibility risks, and behavior that must not regress are represented in tasks or verification. - `escalation_integrity`: tasks include checkpoints or escalation when evidence, permissions, user decisions, or risky ambiguity are required. - `closure_honesty`: the plan's done criteria and evidence limits support only claims that execution can actually prove. +- `closure_honesty`: for UI proof, reject agent-only `looks good` closure, artifact-count proof, unsupported evidence kinds, and human acceptance that converts missing/mismatched non-human evidence into `satisfied` proof. Waiver, deferment, proof debt, or narrowed-claim language is acceptable only when the stronger UI claim is not treated as proven. +- `closure_honesty`: for UI proof privacy, require artifact `visibility`, `retention`, `sensitivity`, and `safe_to_publish`, require `gsdd ui-proof validate` or `gsdd health` when bundle metadata exists, and reject public/tracked/delivery/publication proof claims backed by local-only or `safe_to_publish: false` artifacts. - `high_leverage_review`: high-leverage surfaces have a second-pass review or equivalent contradiction/staleness check before completion. - `approach_alignment`: when APPROACH.md is provided, verify that plan tasks implement the chosen approaches from the user's decisions. Check: - **Alignment proof valid?** When `workflow.discuss` is `true`, APPROACH.md must record `alignment_status: user_confirmed` or `alignment_status: approved_skip`. Missing alignment proof, unknown status, or agent-discretion-only proof -> `blocker` with `fix_hint` telling the planner to revise APPROACH.md through real user alignment or an explicit user-approved skip. diff --git a/distilled/templates/ui-proof.md b/distilled/templates/ui-proof.md new file mode 100644 index 00000000..d7d7ef55 --- /dev/null +++ b/distilled/templates/ui-proof.md @@ -0,0 +1,174 @@ +# UI Proof Bundle Template + +Use this template when work affects rendered UI or when a plan defines `ui_proof_slots`. Keep the bundle compact, claim-specific, and attached to the relevant phase, quick task, or brownfield change. + +UI proof uses the existing closure evidence kinds only: `code`, `test`, `runtime`, `delivery`, and `human`. Screenshots, traces, videos, reports, accessibility scans, Gherkin, visual diffs, and manual notes are artifact types or activities that map onto those evidence kinds. They are not new evidence kinds. + +## Planned Proof Slots + +Every UI-sensitive plan needs either at least one slot under `ui_proof_slots` or an explicit `no_ui_proof_rationale` explaining why no rendered UI proof is required. + +```yaml +ui_proof_slots: + - slot_id: ui-01 + requirement_id: REQ-01 + claim: "User can complete the changed flow without a broken rendered UI." + route_state: "/example route, role, data state, and UI state to inspect" + required_evidence_kinds: [test, runtime] + optional_evidence_kinds: [human] + minimum_observations: + - "Changed control is visible and usable in the stated state." + - "Expected interaction completes without console/runtime error." + environment: + app_url: "http://localhost:3000" + data_state: "synthetic or seeded data" + viewport: + width: 1280 + height: 720 + notes: "Use project default unless responsive behavior is part of the claim." + manual_acceptance_required: false + claim_limit: "Does not prove cross-browser layout, full accessibility conformance, production delivery, or unrelated UI states." +no_ui_proof_rationale: null +``` + +Slot rules: +- Keep each slot tied to one exact UI claim. +- Use the lightest proof that can catch a botched rendered experience for that claim. +- Source annotations, AST/cAST findings, semantic search hits, comments, and Semble-like retrieval may help discover proof obligations. They are discovery hints only; they do not satisfy proof slots. +- Do not add Playwright, Cypress, Storybook, Cucumber, CI, browser MCP, or visual-regression tooling by default. +- Human approval is required for visual taste, accessibility judgment, baseline acceptance, subjective polish/layout quality, and privacy publication decisions. +- Human approval does not replace required non-human evidence when the slot requires `code`, `test`, `runtime`, or `delivery` evidence. + +## Observed Proof Bundle + +Create or update this bundle during execution or verification when planned UI proof slots exist. JSON is the canonical machine-readable proof bundle format. Markdown proof files must include fenced JSON for deterministic validation. + +```json +{ + "proof_bundle_version": 1, + "scope": { + "work_item": "phase-or-quick-or-brownfield-id", + "requirement_ids": ["REQ-01"], + "slot_ids": ["ui-01"], + "claim": "User can complete the changed flow without a broken rendered UI." + }, + "route_state": { + "route": "/example", + "state": "role, data state, feature flag, loading/error/empty state, or component story" + }, + "environment": { + "app_url": "http://localhost:3000", + "browser": "project default or manual browser", + "browser_version": "record if known", + "os": "record if relevant", + "data_state": "synthetic or seeded data" + }, + "viewport": { + "width": 1280, + "height": 720, + "device_scale_factor": "record if relevant" + }, + "evidence_inputs": { + "kinds": ["test", "runtime"], + "tools_used": ["manual"] + }, + "commands_or_manual_steps": [ + { + "command": "npm run test:e2e -- changed-flow.spec.ts", + "exit_code": 0, + "result": "passed", + "attempts": 1 + }, + { + "manual_step": "Open /example as synthetic user and complete the changed interaction.", + "result": "passed" + } + ], + "observations": [ + { + "observation": "Changed control is visible and completes the flow.", + "claim": "User can complete the changed flow without a broken rendered UI.", + "route_state": { + "route": "/example", + "state": "role, data state, feature flag, loading/error/empty state, or component story" + }, + "evidence_kind": "runtime", + "artifact_refs": ["test-results/changed-flow-report/index.html"], + "privacy": { + "data_classification": "synthetic", + "raw_artifacts_safe_to_publish": false, + "retention": "temporary_review" + }, + "result": "passed", + "claim_limit": "Does not prove Safari/WebKit behavior." + } + ], + "artifacts": [ + { + "path": "test-results/changed-flow-report/index.html", + "type": "report", + "visibility": "local_only", + "retention": "temporary_review", + "sensitivity": "possible", + "safe_to_publish": false, + "notes": "Local report only; not public proof." + } + ], + "privacy": { + "data_classification": "synthetic", + "redactions": [], + "raw_artifacts_safe_to_publish": false, + "retention": "Keep metadata bundle; keep raw artifacts only while needed for review or failed proof triage." + }, + "manual_acceptance": { + "required": false, + "reviewer": null, + "result": "not_applicable" + }, + "result": { + "claim_status": "passed", + "comparison_status_by_slot": { + "ui-01": "satisfied" + } + }, + "claim_limits": [ + "Does not prove Safari/WebKit behavior.", + "Does not prove full WCAG conformance.", + "Does not prove deployed production behavior." + ] +} +``` + +Bundle rules: +- Reference raw screenshots, traces, videos, DOM snapshots, reports, accessibility scans, Gherkin, and visual diffs by path or link. Do not store raw binary or sensitive artifacts inline. +- Each observation must identify the claim, route/state, evidence kind, artifact references behind it, privacy metadata, result, and claim limit it supports. +- Every observation `artifact_refs` value must match an `artifacts[].path` or `artifacts[].url` value. +- Artifact count is never proof. Unsupported or weakly linked artifacts are `partial`, `missing`, `waived`, or `deferred`, not `satisfied`. +- Each artifact must record the locked privacy fields `visibility`, `retention`, `sensitivity`, and `safe_to_publish`. +- Raw screenshots, traces, videos, DOM snapshots, and reports default to `visibility: local_only` plus `safe_to_publish: false` unless explicitly classified as sanitized and safe to publish. +- Local-only or `safe_to_publish: false` artifacts can support local review only; they must not back tracked, public, delivery, release, or publication proof claims. +- Human acceptance may close a narrowed claim only by recording waiver, deferment, or proof debt; it must not upgrade missing or mismatched non-human proof to `satisfied`. +- Quick-mode UI proof should use deterministic synthetic IDs such as `quick-001` and `quick-001-ui-01` when roadmap requirement IDs do not exist. + +## Deterministic Validation + +Use `gsdd ui-proof validate ` on JSON proof-bundle metadata or markdown fenced JSON before relying on a bundle for closure; add `--claim ` only when validating that stronger proof use. Required observed-bundle top-level fields are `proof_bundle_version`, `scope`, `route_state`, `environment`, `viewport`, `evidence_inputs`, `commands_or_manual_steps`, `observations`, `artifacts`, `privacy`, `result`, and `claim_limits`. The validator checks required bundle and observation fields, fixed evidence kinds, `result.claim_status`, observation `result`, comparison statuses, non-empty claim limits, locked artifact and observation privacy fields, observation-to-artifact references, and explicit public/tracked/delivery proof claims that rely on local-only or unsafe artifacts. `claim_status` and observation `result` use `passed`, `failed`, `partial`, `waived`, `deferred`, or `not_applicable`. It is metadata-only and does not inspect raw screenshot, trace, video, DOM, or report contents. + +## Comparison Statuses + +Use these statuses when comparing planned slots to observed proof: + +| Status | Meaning | Claim impact | +| --- | --- | --- | +| `satisfied` | Required observations and evidence kinds are present, scoped, and inspectable for the exact claim. | Supports the scoped UI claim. | +| `partial` | Some proof exists, but observations, artifact references, evidence kinds, privacy metadata, or assurance are weaker than planned. | Record a reduced claim or gap. | +| `missing` | Required proof is absent. | Blocks the UI claim unless explicitly waived or deferred. | +| `waived` | A human or approved plan waiver accepts the risk. | Does not prove the UI claim. | +| `deferred` | Proof moved to later work. | Current work must not claim the UI behavior is proven. | +| `not_applicable` | Accepted rationale says no UI proof is required. | No UI proof gap for that claim. | + +Proof debt notes should name the slot, claim, route/state, missing or weak linkage, human acceptance basis, narrowed claim limit, and follow-up trigger. + +## Claim Boundary + +A UI proof bundle proves only the scoped claim, route/state, environment, viewport, observations, and evidence kinds it records. It does not imply broad visual quality, cross-browser coverage, full accessibility conformance, production delivery, release readiness, or public proof unless those dimensions are explicitly planned, evidenced, and classified safe to publish. diff --git a/distilled/workflows/audit-milestone.md b/distilled/workflows/audit-milestone.md index 717259eb..f7d2dd87 100644 --- a/distilled/workflows/audit-milestone.md +++ b/distilled/workflows/audit-milestone.md @@ -126,6 +126,7 @@ Combine: - Integration checker's report (wiring gaps, auth gaps, broken flows, requirements integration map) - Evidence observations by kind (`code`, `test`, `runtime`, `delivery`, `human`) from phase verifications, summaries, integration findings, and delivery metadata - Release claim posture observations: selected `release_claim_posture`, unsupported claims, waivers, deferrals, and contradiction checks for public, runtime, delivery, planning-drift, and generated-surface claims +- UI proof debt from phase/quick proof bundles or verification gaps, preserving the rule that waiver/deferment/human acceptance narrows claims rather than satisfying missing proof ## 5. 3-Source Cross-Reference diff --git a/distilled/workflows/execute.md b/distilled/workflows/execute.md index 04e90774..1c15da20 100644 --- a/distilled/workflows/execute.md +++ b/distilled/workflows/execute.md @@ -164,6 +164,11 @@ Before reporting a task complete: - if an API change is involved, hit the endpoint or targeted integration path - A task is not complete because code was written. It is complete when the intended verification path actually passes. +### UI Proof Execution +If the plan defines non-empty `ui_proof_slots`, create or update the observed UI proof bundle before claiming completion; required top-level fields are `proof_bundle_version`, `scope`, `route_state`, `environment`, `viewport`, `evidence_inputs`, `commands_or_manual_steps`, `observations`, `artifacts`, `privacy`, `result`, and `claim_limits`. +Use existing UI tooling when available and cheap; manual/browser proof is acceptable when it records route/state, steps, observations, artifact references, and claim limits. Do not install Playwright, Cypress, Cucumber, Storybook, browser MCP, CI, or visual-regression tooling by default. Screenshots, traces, videos, reports, accessibility scans, Gherkin, visual diffs, and manual notes map onto existing evidence kinds, not new evidence kinds; reference raw artifacts by path/link instead of storing them inline. +Each artifact entry must include `visibility`, `retention`, `sensitivity`, and `safe_to_publish`; raw screenshots, traces, videos, DOM snapshots, and reports default to `local_only` and `safe_to_publish: false` unless explicitly sanitized. Use `gsdd ui-proof validate ` when bundle metadata exists, adding `--claim <...>` only when relying on the bundle for public, tracked, delivery, release, or publication proof. Visual taste, accessibility judgment, baseline acceptance, subjective polish/layout quality, and privacy publication decisions require human evidence or explicit waiver; artifact count, source comments, AST/cAST findings, semantic search, and Semble-like retrieval are not proof. If evidence does not match the slot claim, route/state, observation, artifact path/manual step, privacy metadata, result, and claim limit, record proof debt, waiver, deferment, or reduced claim language rather than `satisfied` proof. + ### Git Guidance ```bash @@ -440,20 +445,14 @@ Execution is done when all of these are true: -Report to the user what was accomplished, then present the next step: - +Report what was accomplished, then present the next step: --- **Completed:** Plan execution — created `.planning/phases/{phase_dir}/{plan_id}-SUMMARY.md`. - **Next step:** Check `.planning/config.json` → `workflow.verifier`: - If `true`: run `/gsdd-verify` — verify that the phase goal was achieved - If `false` (or key missing): run `/gsdd-progress` — check status and route to the next phase -Also available: -- `/gsdd-plan` — plan the next wave (if more plans remain in this phase) -- `/gsdd-quick` — handle a sub-hour task outside the phase cycle -- `/gsdd-pause` — save context for later if stopping work - +Also available: `/gsdd-plan` for the next wave, `/gsdd-quick` for sub-hour work, or `/gsdd-pause` to save context. Consider clearing context before starting the next workflow for best results. --- diff --git a/distilled/workflows/plan.md b/distilled/workflows/plan.md index 890e35af..34991eb7 100644 --- a/distilled/workflows/plan.md +++ b/distilled/workflows/plan.md @@ -135,6 +135,10 @@ Also verify milestone truth is not self-contradictory across the planning surfac If any of these are missing or contradictory, STOP. Report the exact missing contract field or contradiction. Do not improvise a stronger phase contract from chat context alone. + +For UI-sensitive work, include compact `ui_proof_slots` with `slot_id`, optional `requirement_id`, `claim`, `route_state`, fixed evidence kinds (`code`, `test`, `runtime`, `delivery`, `human`), `minimum_observations`, `environment`, `viewport`, `manual_acceptance_required`, and `claim_limit`; otherwise set `no_ui_proof_rationale`. +Do not create slots for backend-only, CLI-only, docs-only, or refactor-only work unless the plan claims a visible UI outcome. Evidence must later match claim, route/state, observation, artifact path, evidence kind, privacy metadata, result, and claim limit; local-only or unsafe artifacts cannot support public, publication, tracked, delivery, or release proof claims. Human approval does not replace required `code`, `test`, `runtime`, or `delivery` evidence. + Plan backward from success criteria. @@ -192,6 +196,8 @@ anti_regression_targets: - Existing session middleware behavior remains unchanged for already-supported routes. known_unknowns: - Exact copy wording for auth errors may still need product confirmation. +ui_proof_slots: [] +no_ui_proof_rationale: Not UI-sensitive; scoped work does not claim a visible UI outcome. high_leverage_surfaces: [] second_pass_required: false closure_claim_limit: Do not claim phase completion until verification satisfies the evidence contract for the scoped truths. @@ -221,6 +227,7 @@ Schema rules: - `files-modified` should list the files this plan is expected to touch - `must_haves` must trace back to roadmap success criteria - `non_goals`, `hard_boundaries`, `escalation_triggers`, and `closure_claim_limit` must not be empty +- include `ui_proof_slots` for UI-sensitive work or `no_ui_proof_rationale` otherwise - `leverage.lost`, `leverage.kept`, and `leverage.gained` must all be explicit - `second_pass_required: true` if `high_leverage_surfaces` is non-empty - `parallelism_budget.max_concurrent_plans` must stay `1` unless the plan proves disjoint write ownership @@ -607,7 +614,7 @@ Planning is done when all of these are true: - [ ] Plan self-check passed - [ ] Success criteria from `ROADMAP.md` are represented as must-haves - [ ] Goal-backward derivation from criteria to artifacts to key links to tasks is explicit -- [ ] Every plan has frontmatter with `phase`, `plan`, `type`, `wave`, `depends_on`, `files-modified`, `autonomous`, `requirements`, `non_goals`, `hard_boundaries`, `escalation_triggers`, `approval_gates`, `anti_regression_targets`, `closure_claim_limit`, `parallelism_budget`, `leverage`, and `must_haves` +- [ ] Every plan has frontmatter with `phase`, `plan`, `type`, `wave`, `depends_on`, `files-modified`, `autonomous`, `requirements`, `non_goals`, `hard_boundaries`, `escalation_triggers`, `approval_gates`, `anti_regression_targets`, `ui_proof_slots` or `no_ui_proof_rationale`, `closure_claim_limit`, `parallelism_budget`, `leverage`, and `must_haves` - [ ] Every plan frontmatter records `runtime` and `assurance` - [ ] Every plan records checker outcome in a structured `` block - [ ] Every task has XML structure with `id`, `type`, `files`, `action`, `verify`, and `done` @@ -620,18 +627,12 @@ Planning is done when all of these are true: Report to the user what was accomplished, then present the next step: - --- **Completed:** Phase planning — created `.planning/phases/{phase_dir}/{plan_id}-PLAN.md`. **Planning stops here:** `gsdd-plan` ends after the plan artifact is written. Do not start implementation in this same run, and do not treat imperative handoff text as execution authorization. Installed generated runtime surfaces are trusted through rendering, not reviewer memory: `npx -y gsdd-cli health` compares any local generated skill/adapter surfaces against current render output, and `npx -y gsdd-cli update` regenerates them when they drift. Bare `gsdd health` / `gsdd update` are equivalent only when globally installed. - **Next workflow:** `/gsdd-execute` — start execution in a separate run when the user explicitly wants implementation to begin - -Also available: -- `/gsdd-plan` — create additional plans for the same phase (if multi-wave) -- `/gsdd-progress` — check overall project status - +Also available: `/gsdd-plan` for another wave, or `/gsdd-progress` for overall status. Consider clearing context before starting the next workflow for best results. --- diff --git a/distilled/workflows/quick.md b/distilled/workflows/quick.md index 5dc1d0bd..fa282624 100644 --- a/distilled/workflows/quick.md +++ b/distilled/workflows/quick.md @@ -118,6 +118,10 @@ Delegate to the planner role in quick mode. - No research phase, no ROADMAP requirements - Do NOT extract phase requirement IDs — there is no active phase - Derive must-haves directly from the task description +- If the quick task is UI-sensitive, include proportional `ui_proof_slots` with slot_id, claim, route_state, required_evidence_kinds, minimum_observations, environment, viewport, manual_acceptance_required, and claim_limit; otherwise include a short `no_ui_proof_rationale` +- UI proof slots must be matchable to exact observed evidence later: claim, route/state, observation, evidence kind, artifact path or manual step, privacy metadata, result, and claim limit. Discovery hints from source comments, AST/cAST, semantic search, or Semble-like retrieval do not satisfy proof. +- Observed artifact metadata must include `visibility`, `retention`, `sensitivity`, and `safe_to_publish`; raw screenshots, traces, videos, DOM snapshots, and reports are local-only/unsafe by default. Use `gsdd ui-proof validate ` or `gsdd health` when a bundle exists; add `--claim <...>` only for public, publication, tracked, delivery, or release proof use. +- Keep UI proof proportional: do not scaffold Playwright, Cypress, Cucumber, Storybook, CI, browser MCP, or visual-regression tooling by default - Ignore Step 1 requirement extraction; use inline goal-backward planning only - Target minimal context usage @@ -263,6 +267,8 @@ Delegate to the executor role. - Skip the section of your role contract entirely - Do NOT update ROADMAP.md phase status or SPEC.md current state - Create summary at: `.planning/quick/$NEXT_NUM-$SLUG/$NEXT_NUM-SUMMARY.md` +- If the quick plan defines `ui_proof_slots`, create or update `.planning/quick/$NEXT_NUM-$SLUG/UI-PROOF.md` with fenced JSON containing required top-level fields: `proof_bundle_version`, `scope`, `route_state`, `environment`, `viewport`, `evidence_inputs`, `commands_or_manual_steps`, `observations`, `artifacts`, `privacy`, `result`, and `claim_limits` +- Human approval for visual taste, accessibility judgment, baseline acceptance, subjective polish/layout quality, or privacy publication does not replace required `code`, `test`, `runtime`, or `delivery` evidence **Output:** `.planning/quick/$NEXT_NUM-$SLUG/$NEXT_NUM-SUMMARY.md` **Return:** Summary file path and completion status. diff --git a/distilled/workflows/verify.md b/distilled/workflows/verify.md index 7bb5b350..15ad5d93 100644 --- a/distilled/workflows/verify.md +++ b/distilled/workflows/verify.md @@ -128,6 +128,12 @@ Rules: Note: this step does NOT replace levels 1–3. An artifact can satisfy the evidence-kind requirement and still fail Level 2 (substantive) or Level 3 (wired). Both checks must run. + +If the plan defines non-empty `ui_proof_slots`, compare planned UI proof against observed bundles before closure. If the plan records only `no_ui_proof_rationale`, verify the rationale instead of requiring a bundle. Each observed bundle must include top-level `proof_bundle_version`, `scope`, `route_state`, `environment`, `viewport`, `evidence_inputs`, `commands_or_manual_steps`, `observations`, `artifacts`, `privacy`, `result`, and `claim_limits`. +Classify each slot as exactly one of: `satisfied`, `partial`, `missing`, `waived`, `deferred`, or `not_applicable`. Waiver/deferment narrows the claim; it is not proof. Screenshots, traces, videos, reports, accessibility scans, Gherkin, visual diffs, and manual notes are artifact types or activities mapped onto existing evidence kinds, not new evidence kinds. Artifact count is never proof; each artifact must tie to the slot claim, route/state, observation, artifact path/link, privacy metadata, and claim limit. +Artifact privacy metadata must include `visibility`, `retention`, `sensitivity`, and `safe_to_publish`; raw screenshots, traces, videos, DOM snapshots, and reports default to local-only and unsafe unless sanitized. Run `gsdd ui-proof validate ` or treat `gsdd health` E10 as blocking; add `--claim <...>` when relying on the bundle for public, tracked, delivery, release, or publication proof. Visual taste, accessibility judgment, baseline acceptance, subjective polish/layout quality, and privacy publication require human evidence or explicit waiver; human approval does not replace required `code`, `test`, `runtime`, or `delivery` evidence. Source annotations, AST/cAST findings, semantic search, comments, and Semble-like retrieval are discovery hints only. + + Check every artifact at three levels. A common failure mode is a file that exists but is still a stub. ### Level 1: Exists @@ -423,13 +429,9 @@ Verification is done when all of these are true: Report the verification result to the user, then present the next step: --- - **Completed:** Phase verification — created `.planning/phases/{phase_dir}/{phase_num}-VERIFICATION.md`. - If status is `passed`: **Next step:** `/gsdd-progress` — route to the next phase or milestone audit. If status is `gaps_found`: **Next step:** `/gsdd-plan` — re-plan to close the identified gaps. If status is `human_needed`: **Next step:** `/gsdd-verify-work`, then rerun `/gsdd-verify` with UAT results. - Consider clearing context before starting the next workflow for best results. - diff --git a/tests/gsdd.guards.test.cjs b/tests/gsdd.guards.test.cjs index e9dca1ff..ecee047f 100644 --- a/tests/gsdd.guards.test.cjs +++ b/tests/gsdd.guards.test.cjs @@ -3463,6 +3463,86 @@ describe('G53 - Deliberate Subagent Contract', () => { }); }); +describe('G55 - UI Proof Contract', () => { + const template = fs.readFileSync(path.join(ROOT, 'distilled', 'templates', 'ui-proof.md'), 'utf-8'); + const planContent = fs.readFileSync(path.join(ROOT, 'distilled', 'workflows', 'plan.md'), 'utf-8'); + const executeContent = fs.readFileSync(path.join(ROOT, 'distilled', 'workflows', 'execute.md'), 'utf-8'); + const quickContent = fs.readFileSync(path.join(ROOT, 'distilled', 'workflows', 'quick.md'), 'utf-8'); + const verifyContent = fs.readFileSync(path.join(ROOT, 'distilled', 'workflows', 'verify.md'), 'utf-8'); + const plannerRole = fs.readFileSync(path.join(ROOT, 'agents', 'planner.md'), 'utf-8'); + const executorRole = fs.readFileSync(path.join(ROOT, 'agents', 'executor.md'), 'utf-8'); + const verifierRole = fs.readFileSync(path.join(ROOT, 'agents', 'verifier.md'), 'utf-8'); + const planChecker = fs.readFileSync(path.join(ROOT, 'distilled', 'templates', 'delegates', 'plan-checker.md'), 'utf-8'); + + test('template preserves planned slot and observed bundle fields', () => { + for (const token of [ + 'ui_proof_slots', + 'no_ui_proof_rationale', + 'claim', + 'route_state', + 'required_evidence_kinds', + 'minimum_observations', + 'environment', + 'viewport', + 'manual_acceptance_required', + 'claim_limit', + 'requirement_ids', + 'slot_ids', + 'evidence_inputs', + 'commands_or_manual_steps', + 'observations', + 'artifacts', + 'privacy', + 'result', + 'claim_status', + 'claim_limits', + ]) { + assert.match(template, new RegExp(token), `ui-proof.md must include ${token}. FIX: Restore the locked UI proof schema field.`); + } + assert.match(template, /```json/, 'ui-proof.md must use fenced JSON for observed proof bundle validation. FIX: Keep JSON canonical.'); + }); + + test('template defines comparison statuses and unchanged evidence kinds', () => { + assert.match(template, /code`, `test`, `runtime`, `delivery`, and `human`/, 'ui-proof.md must preserve the five stable evidence kinds.'); + for (const status of ['satisfied', 'partial', 'missing', 'waived', 'deferred', 'not_applicable']) { + assert.match(template, new RegExp('`' + status + '`'), `ui-proof.md must define comparison status ${status}.`); + } + assert.match(template, /not new evidence kinds/i, 'UI artifacts must not become new evidence kinds.'); + }); + + test('workflow and role sources preserve UI proof planning execution and verification contracts', () => { + assert.match(planContent, //, 'plan.md must include UI proof planning contract.'); + assert.match(planContent, /ui_proof_slots[\s\S]*no_ui_proof_rationale/, 'plan.md must require slots or no-UI-proof rationale.'); + assert.match(plannerRole, //, 'planner role must include UI proof planning guidance.'); + assert.match(executeContent, /UI Proof Execution/, 'execute.md must include UI proof execution guidance.'); + assert.match(executorRole, /UI Proof Execution/, 'executor role must include UI proof execution guidance.'); + assert.match(quickContent, /ui_proof_slots/, 'quick.md must preserve proportional UI proof slots.'); + assert.match(verifyContent, //, 'verify.md must include planned-vs-observed UI proof comparison.'); + assert.match(verifierRole, /For UI proof slots, fail closed/i, 'verifier role must fail closed on weak UI proof.'); + }); + + test('guardrails reject agent-only proof, artifact theater, and unsafe public claims', () => { + const combined = [template, planContent, executeContent, quickContent, verifyContent, plannerRole, executorRole, verifierRole, planChecker].join('\n'); + for (const phrase of [ + /visual taste/i, + /accessibility judgment/i, + /baseline acceptance/i, + /subjective polish\/layout quality/i, + /privacy publication/i, + /does not replace required `code`, `test`, `runtime`, or `delivery` evidence/i, + /agent-only `looks good` closure/i, + /artifact-count proof|Artifact count is never proof/i, + /visibility[\s\S]*retention[\s\S]*sensitivity[\s\S]*safe_to_publish/, + /observation privacy fields|observation privacy metadata|privacy metadata/i, + /`passed`, `failed`, `partial`, `waived`, `deferred`, or `not_applicable`/, + /local-only or `safe_to_publish: false` artifacts|local-only or unsafe artifacts/i, + /Do not install Playwright, Cypress, Cucumber, Storybook, browser MCP, CI, or visual-regression tooling by default|do not scaffold Playwright, Cypress, Cucumber, Storybook, CI, browser MCP, or visual-regression tooling by default/i, + ]) { + assert.match(combined, phrase, `UI proof contract must preserve guardrail ${phrase}.`); + } + }); +}); + describe('G42 - Public Proof Export', () => { test('public proof and support entrypoints are git-tracked before repo truth advertises them', () => { const requiredTrackedPaths = [ diff --git a/tests/gsdd.health.test.cjs b/tests/gsdd.health.test.cjs index 3fade97e..8ff5d8ad 100644 --- a/tests/gsdd.health.test.cjs +++ b/tests/gsdd.health.test.cjs @@ -47,6 +47,7 @@ function writeAlignedTruthFixtures() { | E7 | ERROR | x | | E8 | ERROR | x | | E9 | ERROR | x | +| E10 | ERROR | x | | W1 | WARN | x | | W2 | WARN | x | | W3 | WARN | x | @@ -146,6 +147,50 @@ function writeForkHonestAlignmentFixtures() { ].join('\n')); } +function validUiProofBundle(overrides = {}) { + return { + proof_bundle_version: 1, + scope: { + work_item: 'quick-001-example-ui', + requirement_ids: ['quick-001'], + slot_ids: ['quick-001-ui-01'], + claim: 'Local reviewer can inspect changed UI proof metadata.', + }, + route_state: { route: '/example', state: 'synthetic user' }, + environment: { app_url: 'http://localhost:3000', data_state: 'synthetic' }, + viewport: { width: 1280, height: 720 }, + evidence_inputs: { kinds: ['test', 'runtime'], tools_used: ['manual'] }, + commands_or_manual_steps: [{ manual_step: 'Open /example.', result: 'passed' }], + observations: [{ + observation: 'Changed state is visible.', + claim: 'Local reviewer can inspect the changed UI proof metadata.', + route_state: { route: '/example', state: 'synthetic user' }, + evidence_kind: 'runtime', + artifact_refs: ['artifacts/report.html'], + privacy: { data_classification: 'synthetic', raw_artifacts_safe_to_publish: false, retention: 'temporary_review' }, + result: 'passed', + claim_limit: 'Does not prove unrelated UI states.', + }], + artifacts: [{ + path: 'artifacts/report.html', + type: 'report', + visibility: 'local_only', + retention: 'temporary_review', + sensitivity: 'synthetic', + safe_to_publish: false, + }], + privacy: { + data_classification: 'synthetic', + redactions: [], + raw_artifacts_safe_to_publish: false, + retention: 'Keep raw artifacts only while needed for review.', + }, + result: { claim_status: 'passed', comparison_status_by_slot: { 'quick-001-ui-01': 'satisfied' } }, + claim_limits: ['Does not prove unrelated UI states.'], + ...overrides, + }; +} + describe('Health — pre-init guard', () => { test('no .planning/ → pre-init error with exit code 1', async () => { const result = await runCliAsMain(tmpDir, ['health']); @@ -291,6 +336,52 @@ describe('Health — ERROR: missing research/codebase/root templates', () => { const json = JSON.parse(result.output); assert.ok(json.errors.some((e) => e.id === 'E8' && e.message.includes('spec.md'))); }); + + test('ui-proof root template removed → E8', async () => { + await initWorkspace(); + fs.rmSync(path.join(tmpDir, '.planning', 'templates', 'ui-proof.md'), { force: true }); + const result = await runCliAsMain(tmpDir, ['health', '--json']); + const json = JSON.parse(result.output); + assert.ok(json.errors.some((e) => e.id === 'E8' && e.message.includes('ui-proof.md'))); + }); +}); + +describe('Health — ERROR: invalid UI proof bundle metadata', () => { + test('invalid known UI proof bundle → E10 without mutating files', async () => { + await initWorkspace(); + const bundlePath = path.join(tmpDir, '.planning', 'ui-proof.json'); + const invalidBundle = validUiProofBundle({ proof_claim: 'public' }); + fs.writeFileSync(bundlePath, JSON.stringify(invalidBundle, null, 2)); + const before = fs.readFileSync(bundlePath, 'utf-8'); + + const result = await runCliAsMain(tmpDir, ['health', '--json']); + const json = JSON.parse(result.output); + + assert.strictEqual(json.status, 'broken'); + assert.ok(json.errors.some((e) => e.id === 'E10' && e.message.includes('unsafe_public_proof_claim'))); + assert.strictEqual(fs.readFileSync(bundlePath, 'utf-8'), before, 'health must not mutate UI proof bundles'); + }); + + test('invalid nested brownfield UI proof bundle → E10', async () => { + await initWorkspace(); + writeFile('.planning/brownfield-change/change-001/UI-PROOF.md', '```json\n{"proof_bundle_version":1}\n```\n'); + + const result = await runCliAsMain(tmpDir, ['health', '--json']); + const json = JSON.parse(result.output); + + assert.strictEqual(result.exitCode, 1); + assert.ok(json.errors.some((e) => e.id === 'E10' && e.message.includes('brownfield-change/change-001/UI-PROOF.md'))); + }); + + test('valid local-only known UI proof bundle → no E10', async () => { + await initWorkspace(); + fs.writeFileSync(path.join(tmpDir, '.planning', 'ui-proof.json'), JSON.stringify(validUiProofBundle(), null, 2)); + + const result = await runCliAsMain(tmpDir, ['health', '--json']); + const json = JSON.parse(result.output); + + assert.ok(!json.errors.some((e) => e.id === 'E10')); + }); }); describe('Health — WARN: missing manifest', () => { diff --git a/tests/gsdd.init.test.cjs b/tests/gsdd.init.test.cjs index c7f0be8b..e6151796 100644 --- a/tests/gsdd.init.test.cjs +++ b/tests/gsdd.init.test.cjs @@ -130,6 +130,8 @@ describe('gsdd init and update', () => { assert.ok(fs.existsSync(path.join(tmpDir, '.planning', 'templates', 'delegates', 'plan-checker.md'))); assert.ok(fs.existsSync(path.join(tmpDir, '.planning', 'templates', 'auth-matrix.md')), 'auth-matrix.md template must be distributed during init'); + assert.ok(fs.existsSync(path.join(tmpDir, '.planning', 'templates', 'ui-proof.md')), + 'ui-proof.md template must be distributed during init'); for (const file of ['CHANGE.md', 'HANDOFF.md', 'VERIFICATION.md']) { assert.ok(fs.existsSync(path.join(tmpDir, '.planning', 'templates', 'brownfield-change', file)), `brownfield-change/${file} template must be distributed during init`); @@ -154,6 +156,8 @@ describe('gsdd init and update', () => { const launcher = fs.readFileSync(path.join(tmpDir, '.planning', 'bin', 'gsdd.mjs'), 'utf-8'); assert.match(launcher, /bootstrapHelperWorkspace\(import\.meta\.url\)/); assert.match(launcher, /import \{ cmdFileOp \} from '\.\/lib\/file-ops\.mjs';/); + assert.match(launcher, /import \{ cmdUiProof \} from '\.\/lib\/ui-proof\.mjs';/); + assert.match(launcher, /'ui-proof': cmdUiProof/); assert.doesNotMatch(launcher, /npm(?:\.cmd)?'.*exec.*--package=/s); assert.doesNotMatch(launcher, new RegExp(tmpDir.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))); assert.doesNotMatch(launcher, /Repos[\\/].+get-shit-done-distilled/i); diff --git a/tests/phase.test.cjs b/tests/phase.test.cjs index 7080e8e6..e24d0869 100644 --- a/tests/phase.test.cjs +++ b/tests/phase.test.cjs @@ -34,6 +34,10 @@ async function importRuntimeFreshnessModule() { return import(`${pathToFileURL(path.join(__dirname, '..', 'bin', 'lib', 'runtime-freshness.mjs')).href}?t=${Date.now()}-${Math.random()}`); } +async function importUiProofModule() { + return import(`${pathToFileURL(path.join(__dirname, '..', 'bin', 'lib', 'ui-proof.mjs')).href}?t=${Date.now()}-${Math.random()}`); +} + async function importSessionFingerprintModule() { return import(`${pathToFileURL(path.join(__dirname, '..', 'bin', 'lib', 'session-fingerprint.mjs')).href}?t=${Date.now()}-${Math.random()}`); } @@ -1373,6 +1377,47 @@ describe('Phase 30 lifecycle-preflight helper', () => { assert.ok(output.blockers.some((blocker) => blocker.code === 'roadmap_phase_status_mismatch')); }); + test('allows audit-milestone preflight when active milestone uses level-two heading', async () => { + fs.writeFileSync( + path.join(tmpDir, '.planning', 'ROADMAP.md'), + [ + '# Roadmap', + '', + '## Milestones', + '', + '- 🚧 **v1.7 Agentic Engineering Hardening** — Phases 50-54 (in progress)', + '', + '## Phases', + '', + '## v1.7 Agentic Engineering Hardening', + '', + '- [x] **Phase 30: Deterministic Lifecycle Gates** — [ENGINE-02]', + '', + ].join('\n') + ); + fs.writeFileSync(path.join(tmpDir, '.planning', 'SPEC.md'), '- [x] **[ENGINE-02]**: lifecycle gates\n'); + fs.writeFileSync( + path.join(tmpDir, '.planning', 'phases', '30-deterministic-lifecycle-gates', '30-PLAN.md'), + '# plan\n' + ); + fs.writeFileSync( + path.join(tmpDir, '.planning', 'phases', '30-deterministic-lifecycle-gates', '30-SUMMARY.md'), + '# summary\n' + ); + fs.writeFileSync( + path.join(tmpDir, '.planning', 'phases', '30-deterministic-lifecycle-gates', '30-VERIFICATION.md'), + '# verification\n' + ); + + const result = await runCliAsMain(tmpDir, ['lifecycle-preflight', 'audit-milestone']); + assert.strictEqual(result.exitCode, 0, result.output); + + const output = JSON.parse(result.output); + assert.strictEqual(output.allowed, true); + assert.strictEqual(output.lifecycle.currentMilestone.version, 'v1.7'); + assert.strictEqual(output.lifecycle.currentMilestone.title, 'Agentic Engineering Hardening'); + }); + test('blocks complete-milestone preflight when roadmap overview/detail status mismatches', async () => { fs.writeFileSync( path.join(tmpDir, '.planning', 'ROADMAP.md'), @@ -2300,6 +2345,269 @@ describe('Phase 31 evidence-gated closure helpers', () => { }); }); +describe('Phase 57 UI proof validation helper', () => { + let tmpDir; + + beforeEach(() => { + tmpDir = createGsddTempProject(); + fs.mkdirSync(path.join(tmpDir, '.planning'), { recursive: true }); + }); + + afterEach(() => { + cleanup(tmpDir); + }); + + function validBundle(overrides = {}) { + return { + proof_bundle_version: 1, + scope: { + work_item: 'quick-001-example-ui', + requirement_ids: ['quick-001'], + slot_ids: ['quick-001-ui-01'], + claim: 'Local reviewer can inspect the changed UI proof metadata.', + }, + route_state: { route: '/example', state: 'synthetic user' }, + environment: { app_url: 'http://localhost:3000', data_state: 'synthetic' }, + viewport: { width: 1280, height: 720 }, + evidence_inputs: { kinds: ['test', 'runtime'], tools_used: ['manual'] }, + commands_or_manual_steps: [{ manual_step: 'Open /example and inspect the changed state.', result: 'passed' }], + observations: [{ + observation: 'Changed state is visible.', + claim: 'Local reviewer can inspect the changed UI proof metadata.', + route_state: { route: '/example', state: 'synthetic user' }, + evidence_kind: 'runtime', + artifact_refs: ['artifacts/report.html'], + privacy: { data_classification: 'synthetic', raw_artifacts_safe_to_publish: false, retention: 'temporary_review' }, + result: 'passed', + claim_limit: 'Does not prove unrelated UI states.', + }], + artifacts: [{ + path: 'artifacts/report.html', + type: 'report', + visibility: 'local_only', + retention: 'temporary_review', + sensitivity: 'synthetic', + safe_to_publish: false, + }], + privacy: { + data_classification: 'synthetic', + redactions: [], + raw_artifacts_safe_to_publish: false, + retention: 'Keep raw artifacts only while needed for review.', + }, + result: { claim_status: 'passed', comparison_status_by_slot: { 'quick-001-ui-01': 'satisfied' } }, + claim_limits: ['Does not prove unrelated UI states.'], + ...overrides, + }; + } + + test('valid local-only proof metadata passes without browser tooling or dependencies', async () => { + const mod = await importUiProofModule(); + const result = mod.validateUiProofBundle(validBundle()); + assert.strictEqual(result.valid, true, JSON.stringify(result.errors)); + }); + + test('fenced JSON in markdown parses but YAML-only bundles fail', async () => { + const mod = await importUiProofModule(); + const bundle = validBundle(); + const fenced = mod.parseUiProofBundleContent(`# UI proof\n\n\`\`\`json\n${JSON.stringify(bundle)}\n\`\`\`\n`, 'UI-PROOF.md'); + assert.deepStrictEqual(fenced.errors, []); + assert.strictEqual(fenced.bundle.scope.work_item, 'quick-001-example-ui'); + + const yamlOnly = mod.parseUiProofBundleContent('proof_bundle_version: 1\nscope:\n claim: nope\n', 'UI-PROOF.md'); + assert.strictEqual(yamlOnly.bundle, null); + assert.ok(yamlOnly.errors.some((error) => error.code === 'unparseable_bundle')); + }); + + test('missing fields invalid statuses unsupported evidence kinds and missing claim limits fail', async () => { + const mod = await importUiProofModule(); + const bundle = validBundle({ + evidence_inputs: { kinds: ['screenshot'] }, + result: { comparison_status_by_slot: { 'quick-001-ui-01': 'looks_good' } }, + claim_limits: [], + }); + delete bundle.scope.work_item; + delete bundle.artifacts[0].safe_to_publish; + + const result = mod.validateUiProofBundle(bundle); + assert.strictEqual(result.valid, false); + assert.ok(result.errors.some((error) => error.path === 'scope.work_item')); + assert.ok(result.errors.some((error) => error.code === 'unsupported_evidence_kind')); + assert.ok(result.errors.some((error) => error.code === 'invalid_comparison_status')); + assert.ok(result.errors.some((error) => error.code === 'missing_claim_limits')); + assert.ok(result.errors.some((error) => error.path === 'artifacts[0].safe_to_publish')); + }); + + test('empty required arrays and mismatched comparison slots fail', async () => { + const mod = await importUiProofModule(); + const bundle = validBundle(); + bundle.scope.requirement_ids = []; + bundle.commands_or_manual_steps = []; + bundle.observations = []; + bundle.result.comparison_status_by_slot = { 'quick-001-ui-99': 'satisfied' }; + + const result = mod.validateUiProofBundle(bundle); + assert.strictEqual(result.valid, false); + assert.ok(result.errors.some((error) => error.path === 'scope.requirement_ids')); + assert.ok(result.errors.some((error) => error.path === 'commands_or_manual_steps')); + assert.ok(result.errors.some((error) => error.path === 'observations')); + assert.ok(result.errors.some((error) => error.code === 'missing_comparison_status')); + assert.ok(result.errors.some((error) => error.code === 'unknown_comparison_slot')); + }); + + test('observation artifact references must resolve to declared artifacts', async () => { + const mod = await importUiProofModule(); + const bundle = validBundle(); + bundle.observations[0].artifact_refs = ['missing/report.html']; + + const result = mod.validateUiProofBundle(bundle); + assert.strictEqual(result.valid, false); + assert.ok(result.errors.some((error) => error.code === 'unknown_artifact_ref')); + }); + + test('observations must include scoped support metadata', async () => { + const mod = await importUiProofModule(); + const bundle = validBundle(); + delete bundle.observations[0].claim; + delete bundle.observations[0].artifact_refs; + + const result = mod.validateUiProofBundle(bundle); + assert.strictEqual(result.valid, false); + assert.ok(result.errors.some((error) => error.path === 'observations[0].claim')); + assert.ok(result.errors.some((error) => error.path === 'observations[0].artifact_refs')); + }); + + test('observation privacy and result status are schema-checked', async () => { + const mod = await importUiProofModule(); + const bundle = validBundle(); + bundle.observations[0].privacy = { + data_classification: 'synthetic', + raw_artifacts_safe_to_publish: 'no', + }; + bundle.observations[0].result = 'looks_good'; + + const result = mod.validateUiProofBundle(bundle); + assert.strictEqual(result.valid, false); + assert.ok(result.errors.some((error) => error.path === 'observations[0].privacy.retention')); + assert.ok(result.errors.some((error) => error.code === 'invalid_raw_artifacts_safe_to_publish')); + assert.ok(result.errors.some((error) => error.code === 'invalid_observation_result')); + }); + + test('result claim status is required and enum-validated', async () => { + const mod = await importUiProofModule(); + const missingStatus = validBundle({ result: { comparison_status_by_slot: { 'quick-001-ui-01': 'satisfied' } } }); + const invalidStatus = validBundle({ result: { claim_status: 'looks_good', comparison_status_by_slot: { 'quick-001-ui-01': 'satisfied' } } }); + + const missingResult = mod.validateUiProofBundle(missingStatus); + assert.strictEqual(missingResult.valid, false); + assert.ok(missingResult.errors.some((error) => error.code === 'missing_claim_status')); + + const invalidResult = mod.validateUiProofBundle(invalidStatus); + assert.strictEqual(invalidResult.valid, false); + assert.ok(invalidResult.errors.some((error) => error.code === 'invalid_claim_status')); + }); + + test('public tracked and delivery claims cannot rely on local-only unsafe raw artifacts', async () => { + const mod = await importUiProofModule(); + const result = mod.validateUiProofBundle(validBundle({ proof_claim: 'public' })); + assert.strictEqual(result.valid, false); + assert.ok(result.errors.some((error) => error.code === 'unsafe_public_proof_claim')); + }); + + test('delivery evidence kind does not imply a delivery proof claim', async () => { + const mod = await importUiProofModule(); + const result = mod.validateUiProofBundle(validBundle({ + evidence_inputs: { kinds: ['test', 'runtime', 'delivery'], tools_used: ['manual'] }, + })); + + assert.strictEqual(result.valid, true, JSON.stringify(result.errors)); + assert.ok(!result.errors.some((error) => error.code === 'unsafe_public_proof_claim')); + }); + + test('negative claim limits do not imply public claim enforcement', async () => { + const mod = await importUiProofModule(); + const result = mod.validateUiProofBundle(validBundle({ + claim_limits: [ + 'Does not prove public release, production delivery, tracked publication, or external support.', + ], + })); + + assert.strictEqual(result.valid, true, JSON.stringify(result.errors)); + assert.ok(!result.errors.some((error) => error.code === 'unsafe_public_proof_claim')); + }); + + test('explicit claim context still enforces public claim artifact safety', async () => { + const mod = await importUiProofModule(); + const result = mod.validateUiProofBundle(validBundle({ + claim_context: { proof_use: 'release' }, + })); + + assert.strictEqual(result.valid, false); + assert.ok(result.errors.some((error) => error.code === 'unsafe_public_proof_claim')); + }); + + test('plural proof claims still enforce public claim artifact safety', async () => { + const mod = await importUiProofModule(); + const result = mod.validateUiProofBundle(validBundle({ proof_claims: ['tracked'] })); + + assert.strictEqual(result.valid, false); + assert.ok(result.errors.some((error) => error.code === 'unsafe_public_proof_claim')); + }); + + test('persisted proof claims reject unsupported claim uses', async () => { + const mod = await importUiProofModule(); + const result = mod.validateUiProofBundle(validBundle({ proof_claim: 'published' })); + assert.strictEqual(result.valid, false); + assert.ok(result.errors.some((error) => error.code === 'unsupported_claim_use')); + }); + + test('explicitly safe-to-publish proof metadata can support public claims', async () => { + const mod = await importUiProofModule(); + const bundle = validBundle({ proof_claim: 'public' }); + bundle.artifacts[0] = { + ...bundle.artifacts[0], + visibility: 'public', + sensitivity: 'sanitized', + safe_to_publish: true, + }; + + const result = mod.validateUiProofBundle(bundle); + assert.strictEqual(result.valid, true, JSON.stringify(result.errors)); + }); + + test('ui-proof validate command validates bundle files directly', async () => { + await runCliAsMain(tmpDir, ['init', '--auto', '--tools', 'agents']); + const bundlePath = path.join(tmpDir, '.planning', 'ui-proof.json'); + fs.writeFileSync(bundlePath, JSON.stringify(validBundle(), null, 2)); + + const result = await runCliAsMain(tmpDir, ['ui-proof', 'validate', '.planning/ui-proof.json']); + assert.strictEqual(result.exitCode, 0, result.output); + const parsed = JSON.parse(result.output); + assert.strictEqual(parsed.valid, true); + }); + + test('ui-proof validate rejects unsupported claim flags', async () => { + await runCliAsMain(tmpDir, ['init', '--auto', '--tools', 'agents']); + const bundlePath = path.join(tmpDir, '.planning', 'ui-proof.json'); + fs.writeFileSync(bundlePath, JSON.stringify(validBundle(), null, 2)); + + const result = await runCliAsMain(tmpDir, ['ui-proof', 'validate', '.planning/ui-proof.json', '--claim', 'published']); + assert.strictEqual(result.exitCode, 1); + assert.match(result.output, /Unsupported UI proof claim use: published/); + }); + + test('ui-proof validate claim flag still enforces public claim artifact safety', async () => { + await runCliAsMain(tmpDir, ['init', '--auto', '--tools', 'agents']); + const bundlePath = path.join(tmpDir, '.planning', 'ui-proof.json'); + fs.writeFileSync(bundlePath, JSON.stringify(validBundle(), null, 2)); + + const result = await runCliAsMain(tmpDir, ['ui-proof', 'validate', '.planning/ui-proof.json', '--claim', 'release']); + assert.strictEqual(result.exitCode, 1); + const parsed = JSON.parse(result.output); + assert.ok(parsed.errors.some((error) => error.code === 'unsafe_public_proof_claim')); + }); +}); + describe('Phase 32 runtime-freshness helper', () => { let tmpDir; From 7b8b9bb8f0271e04a527bdfd9f2d2888472ff6c9 Mon Sep 17 00:00:00 2001 From: PatrickSys Date: Wed, 29 Apr 2026 18:27:23 +0200 Subject: [PATCH 2/2] fix: harden UI proof validation --- bin/lib/ui-proof.mjs | 66 +++++++++++++++++++++++-- distilled/DESIGN.md | 2 +- distilled/templates/ui-proof.md | 2 +- tests/phase.test.cjs | 87 +++++++++++++++++++++++++++++++++ 4 files changed, 150 insertions(+), 7 deletions(-) diff --git a/bin/lib/ui-proof.mjs b/bin/lib/ui-proof.mjs index 9d6e2c78..ba606889 100644 --- a/bin/lib/ui-proof.mjs +++ b/bin/lib/ui-proof.mjs @@ -72,7 +72,6 @@ function normalizeArray(value) { function artifactType(artifact) { const explicit = typeof artifact.type === 'string' ? artifact.type.toLowerCase() : ''; - if (explicit) return explicit; const artifactPath = typeof artifact.path === 'string' ? artifact.path.toLowerCase() : ''; if (/screenshot|\.png$|\.jpe?g$|\.webp$/.test(artifactPath)) return 'screenshot'; if (/trace|\.zip$/.test(artifactPath)) return 'trace'; @@ -125,10 +124,31 @@ function validateObservationPrivacy(privacy, path, errors) { } } +function validateCommandsOrManualSteps(bundle, errors) { + for (const [index, step] of normalizeArray(bundle?.commands_or_manual_steps).entries()) { + const stepPath = `commands_or_manual_steps[${index}]`; + if (!isPlainObject(step)) { + addError(errors, 'invalid_proof_step', stepPath, 'UI proof command/manual step entry must be an object.', 'Record a command or manual_step plus its result.'); + continue; + } + if (!hasValue(step.command) && !hasValue(step.manual_step)) { + addError(errors, 'missing_proof_step_action', stepPath, 'UI proof command/manual step must include command or manual_step.', 'Record the exact command or manual step used to generate the observation.'); + } + if (!hasValue(step.result)) { + addError(errors, 'missing_proof_step_result', `${stepPath}.result`, 'UI proof command/manual step must include result.', `Record result using: ${CLAIM_STATUSES.join(', ')}.`); + } else if (!CLAIM_STATUSES.includes(step.result)) { + addError(errors, 'invalid_proof_step_result', `${stepPath}.result`, `Invalid UI proof command/manual step result: ${step.result}`, `Use only: ${CLAIM_STATUSES.join(', ')}.`); + } + } +} + function validateObservations(bundle, errors) { for (const [index, observation] of normalizeArray(bundle?.observations).entries()) { - if (!isPlainObject(observation)) continue; const observationPath = `observations[${index}]`; + if (!isPlainObject(observation)) { + addError(errors, 'invalid_observation', observationPath, 'UI proof observation entry must be an object.', 'Record observation metadata with claim, route_state, evidence_kind, artifact_refs, privacy, result, and claim_limit.'); + continue; + } for (const field of REQUIRED_OBSERVATION_FIELDS) requireField(observation, field, observationPath, errors); if (hasValue(observation.evidence_kind) && !EVIDENCE_KINDS.includes(observation.evidence_kind)) { addError(errors, 'unsupported_evidence_kind', `${observationPath}.evidence_kind`, `Unsupported UI proof observation evidence kind: ${observation.evidence_kind}`, `Use only: ${EVIDENCE_KINDS.join(', ')}.`); @@ -198,6 +218,22 @@ function artifactReference(artifact) { return null; } +function validateArtifactReferenceSafety(ref, path, errors) { + if (/^[a-z][a-z0-9+.-]*:/i.test(ref)) { + if (!/^https?:\/\//i.test(ref)) { + addError(errors, 'invalid_artifact_ref_location', path, `UI proof artifact reference uses an unsupported URL scheme: ${ref}`, 'Use a workspace-relative path or an http(s) URL; do not reference local file URLs.'); + } + return; + } + if (ref.startsWith('//') || isAbsolute(ref) || ref.split(/[\\/]+/).includes('..')) { + addError(errors, 'invalid_artifact_ref_location', path, `UI proof artifact reference must stay workspace-relative: ${ref}`, 'Use a relative path under the workspace, or an http(s) URL for external sanitized evidence.'); + } +} + +function isSanitizedSensitivity(value) { + return typeof value === 'string' && /(^|[_\s-])(sanitized|public_safe|public-safe)($|[_\s-])/.test(value.toLowerCase()); +} + function validateArtifacts(bundle, errors, publicClaim) { const artifacts = normalizeArray(bundle?.artifacts); if (artifacts.length === 0) { @@ -216,6 +252,7 @@ function validateArtifacts(bundle, errors, publicClaim) { if (!ref) { addError(errors, 'missing_artifact_ref', artifactPath, 'UI proof artifact must include path or url.', 'Reference raw UI artifacts by path or URL; do not inline them.'); } else { + validateArtifactReferenceSafety(ref, artifactPath, errors); artifactRefs.add(ref); } for (const field of REQUIRED_ARTIFACT_FIELDS) { @@ -233,12 +270,28 @@ function validateArtifacts(bundle, errors, publicClaim) { if (publicClaim && (artifact.visibility === 'local_only' || artifact.safe_to_publish !== true)) { addError(errors, 'unsafe_public_proof_claim', artifactPath, 'Public/tracked/delivery UI proof claims cannot rely on local-only or unsafe artifacts.', 'Use local-only claim language, or provide sanitized artifacts with safe_to_publish: true and non-local visibility.'); } + if (publicClaim && isRawUiArtifact(artifact) && !isSanitizedSensitivity(artifact.sensitivity)) { + addError(errors, 'unsafe_public_artifact_sensitivity', `${artifactPath}.sensitivity`, 'Public/tracked/delivery raw UI proof artifacts must be classified sanitized.', 'Set sensitivity to a sanitized/public-safe classification after explicit review, or narrow the proof claim.'); + } } return artifactRefs; } -function validatePrivacy(bundle, errors) { +function validatePrivacy(bundle, errors, publicClaim) { validateObservationPrivacy(bundle.privacy, 'privacy', errors); + if (publicClaim && bundle.privacy?.raw_artifacts_safe_to_publish !== true) { + addError(errors, 'unsafe_public_proof_privacy', 'privacy.raw_artifacts_safe_to_publish', 'Public/tracked/delivery UI proof claims require bundle privacy metadata to classify raw artifacts safe to publish.', 'Use local-only claim language, or set raw_artifacts_safe_to_publish: true after sanitized/public-safe review.'); + } +} + +function validatePublicObservationPrivacy(bundle, errors, publicClaim) { + if (!publicClaim) return; + for (const [index, observation] of normalizeArray(bundle?.observations).entries()) { + if (!isPlainObject(observation)) continue; + if (observation.privacy?.raw_artifacts_safe_to_publish !== true) { + addError(errors, 'unsafe_public_observation_privacy', `observations[${index}].privacy.raw_artifacts_safe_to_publish`, 'Public/tracked/delivery UI proof claims require observation privacy metadata to classify raw artifacts safe to publish.', 'Use local-only claim language, or set raw_artifacts_safe_to_publish: true after sanitized/public-safe review.'); + } + } } function validateObservationArtifactRefs(bundle, artifactRefs, errors) { @@ -263,14 +316,17 @@ export function validateUiProofBundle(bundle, options = {}) { for (const field of REQUIRED_BUNDLE_FIELDS) requireField(bundle, field, '', errors); for (const field of REQUIRED_SCOPE_FIELDS) requireField(bundle.scope, field, 'scope', errors); + const publicClaim = hasPublicClaim(bundle, options); validateClaimUses(bundle, options, errors); validateEvidenceKinds(bundle, errors); + validateCommandsOrManualSteps(bundle, errors); validateObservations(bundle, errors); validateResult(bundle, errors); validateComparisonStatuses(bundle, errors); validateClaimLimits(bundle, errors); - validatePrivacy(bundle, errors); - const artifactRefs = validateArtifacts(bundle, errors, hasPublicClaim(bundle, options)); + validatePrivacy(bundle, errors, publicClaim); + validatePublicObservationPrivacy(bundle, errors, publicClaim); + const artifactRefs = validateArtifacts(bundle, errors, publicClaim); validateObservationArtifactRefs(bundle, artifactRefs, errors); return { valid: errors.length === 0, errors, warnings }; diff --git a/distilled/DESIGN.md b/distilled/DESIGN.md index 744810f1..18b03a78 100644 --- a/distilled/DESIGN.md +++ b/distilled/DESIGN.md @@ -2834,7 +2834,7 @@ Posture compatibility is part of that closeout contract: `repo_closeout` and `ru - Screenshots, traces, videos, reports, accessibility scans, Gherkin, and visual diffs are artifact types or activities mapped onto the five existing evidence kinds, not new evidence kinds. - Source annotations, AST/cAST findings, semantic search hits, comments, and Semble-like retrieval may discover proof obligations, but they are discovery hints only and do not satisfy proof slots. - Visual taste, accessibility judgment, baseline acceptance, subjective polish/layout quality, and privacy publication require human evidence or explicit waiver, and human approval does not replace required `code`, `test`, `runtime`, or `delivery` evidence. -- Deterministic metadata enforcement keeps the evidence and comparison-status vocabularies unchanged: artifact entries require `visibility`, `retention`, `sensitivity`, and `safe_to_publish`; raw screenshots, traces, videos, DOM snapshots, and reports default to `local_only` plus `safe_to_publish: false`; `bin/lib/ui-proof.mjs` validates required bundle/observation fields, fixed evidence kinds, claim/result statuses, comparison statuses, claim limits, privacy metadata, and public/tracked/delivery proof claims backed by local-only or unsafe artifacts. +- Deterministic metadata enforcement keeps the evidence and comparison-status vocabularies unchanged: artifact entries require `visibility`, `retention`, `sensitivity`, and `safe_to_publish`; raw screenshots, traces, videos, DOM snapshots, and reports default to `local_only` plus `safe_to_publish: false`; `bin/lib/ui-proof.mjs` validates required bundle/observation fields, structured command/manual-step entries, fixed evidence kinds, claim/result statuses, comparison statuses, claim limits, privacy metadata, safe artifact references, and public/tracked/delivery proof claims backed by local-only, unsafe, unsanitized, or privacy-contradictory artifacts. - `gsdd health` reports invalid known UI proof bundles as E10 using the same validator, staying read-only and metadata-only. **Leverage:** diff --git a/distilled/templates/ui-proof.md b/distilled/templates/ui-proof.md index d7d7ef55..052a2f29 100644 --- a/distilled/templates/ui-proof.md +++ b/distilled/templates/ui-proof.md @@ -152,7 +152,7 @@ Bundle rules: ## Deterministic Validation -Use `gsdd ui-proof validate ` on JSON proof-bundle metadata or markdown fenced JSON before relying on a bundle for closure; add `--claim ` only when validating that stronger proof use. Required observed-bundle top-level fields are `proof_bundle_version`, `scope`, `route_state`, `environment`, `viewport`, `evidence_inputs`, `commands_or_manual_steps`, `observations`, `artifacts`, `privacy`, `result`, and `claim_limits`. The validator checks required bundle and observation fields, fixed evidence kinds, `result.claim_status`, observation `result`, comparison statuses, non-empty claim limits, locked artifact and observation privacy fields, observation-to-artifact references, and explicit public/tracked/delivery proof claims that rely on local-only or unsafe artifacts. `claim_status` and observation `result` use `passed`, `failed`, `partial`, `waived`, `deferred`, or `not_applicable`. It is metadata-only and does not inspect raw screenshot, trace, video, DOM, or report contents. +Use `gsdd ui-proof validate ` on JSON proof-bundle metadata or markdown fenced JSON before relying on a bundle for closure; add `--claim ` only when validating that stronger proof use. Required observed-bundle top-level fields are `proof_bundle_version`, `scope`, `route_state`, `environment`, `viewport`, `evidence_inputs`, `commands_or_manual_steps`, `observations`, `artifacts`, `privacy`, `result`, and `claim_limits`. The validator checks required bundle and observation fields, structured command/manual-step entries, fixed evidence kinds, `result.claim_status`, observation `result`, comparison statuses, non-empty claim limits, locked artifact and observation privacy fields, observation-to-artifact references, workspace-relative/http(s) artifact references, and explicit public/tracked/delivery proof claims that rely on local-only, unsafe, unsanitized, or privacy-contradictory artifacts. `claim_status`, observation `result`, and command/manual-step `result` use `passed`, `failed`, `partial`, `waived`, `deferred`, or `not_applicable`. It is metadata-only and does not inspect raw screenshot, trace, video, DOM, or report contents. ## Comparison Statuses diff --git a/tests/phase.test.cjs b/tests/phase.test.cjs index be1889cb..4f3e926d 100644 --- a/tests/phase.test.cjs +++ b/tests/phase.test.cjs @@ -2485,6 +2485,19 @@ describe('Phase 57 UI proof validation helper', () => { assert.ok(result.errors.some((error) => error.code === 'unknown_comparison_slot')); }); + test('commands and manual steps must be structured with a result', async () => { + const mod = await importUiProofModule(); + const stringStep = validBundle({ commands_or_manual_steps: ['looks good'] }); + const missingAction = validBundle({ commands_or_manual_steps: [{ result: 'passed' }] }); + const missingResult = validBundle({ commands_or_manual_steps: [{ manual_step: 'Open /example.' }] }); + const invalidResult = validBundle({ commands_or_manual_steps: [{ command: 'npm test', result: 'ok' }] }); + + assert.ok(mod.validateUiProofBundle(stringStep).errors.some((error) => error.code === 'invalid_proof_step')); + assert.ok(mod.validateUiProofBundle(missingAction).errors.some((error) => error.code === 'missing_proof_step_action')); + assert.ok(mod.validateUiProofBundle(missingResult).errors.some((error) => error.code === 'missing_proof_step_result')); + assert.ok(mod.validateUiProofBundle(invalidResult).errors.some((error) => error.code === 'invalid_proof_step_result')); + }); + test('observation artifact references must resolve to declared artifacts', async () => { const mod = await importUiProofModule(); const bundle = validBundle(); @@ -2495,6 +2508,20 @@ describe('Phase 57 UI proof validation helper', () => { assert.ok(result.errors.some((error) => error.code === 'unknown_artifact_ref')); }); + test('artifact references must stay workspace-relative or use http URLs', async () => { + const mod = await importUiProofModule(); + const traversal = validBundle(); + traversal.artifacts[0].path = '../../outside/report.html'; + traversal.observations[0].artifact_refs = ['../../outside/report.html']; + const fileUrl = validBundle(); + fileUrl.artifacts[0].url = 'file:///Users/example/private/report.html'; + delete fileUrl.artifacts[0].path; + fileUrl.observations[0].artifact_refs = ['file:///Users/example/private/report.html']; + + assert.ok(mod.validateUiProofBundle(traversal).errors.some((error) => error.code === 'invalid_artifact_ref_location')); + assert.ok(mod.validateUiProofBundle(fileUrl).errors.some((error) => error.code === 'invalid_artifact_ref_location')); + }); + test('observations must include scoped support metadata', async () => { const mod = await importUiProofModule(); const bundle = validBundle(); @@ -2507,6 +2534,14 @@ describe('Phase 57 UI proof validation helper', () => { assert.ok(result.errors.some((error) => error.path === 'observations[0].artifact_refs')); }); + test('non-object observations fail instead of being skipped', async () => { + const mod = await importUiProofModule(); + const result = mod.validateUiProofBundle(validBundle({ observations: ['looks good'] })); + + assert.strictEqual(result.valid, false); + assert.ok(result.errors.some((error) => error.code === 'invalid_observation')); + }); + test('observation privacy and result status are schema-checked', async () => { const mod = await importUiProofModule(); const bundle = validBundle(); @@ -2591,6 +2626,56 @@ describe('Phase 57 UI proof validation helper', () => { assert.ok(result.errors.some((error) => error.code === 'unsupported_claim_use')); }); + test('raw artifact path inference cannot be bypassed with custom type', async () => { + const mod = await importUiProofModule(); + const bundle = validBundle(); + bundle.artifacts[0] = { + ...bundle.artifacts[0], + path: 'artifacts/shot.png', + type: 'custom', + visibility: 'repo_tracked', + safe_to_publish: false, + }; + bundle.observations[0].artifact_refs = ['artifacts/shot.png']; + + const result = mod.validateUiProofBundle(bundle); + assert.strictEqual(result.valid, false); + assert.ok(result.errors.some((error) => error.code === 'unsafe_raw_artifact')); + }); + + test('public proof claims require matching sanitized privacy metadata', async () => { + const mod = await importUiProofModule(); + const bundle = validBundle({ proof_claim: 'public' }); + bundle.artifacts[0] = { + ...bundle.artifacts[0], + visibility: 'public', + sensitivity: 'sanitized', + safe_to_publish: true, + }; + + const result = mod.validateUiProofBundle(bundle); + assert.strictEqual(result.valid, false); + assert.ok(result.errors.some((error) => error.code === 'unsafe_public_proof_privacy')); + assert.ok(result.errors.some((error) => error.code === 'unsafe_public_observation_privacy')); + }); + + test('public raw artifact claims require sanitized artifact sensitivity', async () => { + const mod = await importUiProofModule(); + const bundle = validBundle({ proof_claim: 'public' }); + bundle.artifacts[0] = { + ...bundle.artifacts[0], + visibility: 'public', + sensitivity: 'secret', + safe_to_publish: true, + }; + bundle.privacy.raw_artifacts_safe_to_publish = true; + bundle.observations[0].privacy.raw_artifacts_safe_to_publish = true; + + const result = mod.validateUiProofBundle(bundle); + assert.strictEqual(result.valid, false); + assert.ok(result.errors.some((error) => error.code === 'unsafe_public_artifact_sensitivity')); + }); + test('explicitly safe-to-publish proof metadata can support public claims', async () => { const mod = await importUiProofModule(); const bundle = validBundle({ proof_claim: 'public' }); @@ -2600,6 +2685,8 @@ describe('Phase 57 UI proof validation helper', () => { sensitivity: 'sanitized', safe_to_publish: true, }; + bundle.privacy.raw_artifacts_safe_to_publish = true; + bundle.observations[0].privacy.raw_artifacts_safe_to_publish = true; const result = mod.validateUiProofBundle(bundle); assert.strictEqual(result.valid, true, JSON.stringify(result.errors));