From 19e43f09db011a07158f473e1d922ffd7db8df68 Mon Sep 17 00:00:00 2001 From: Chris Huber Date: Sat, 30 May 2026 15:00:15 +0000 Subject: [PATCH] feat: add interaction-capable browser probe steps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upgrade wordpress.browser-actions from the partial navigate/click/fill/press action shape into the full ordered interaction-script contract from issue #310, turning Codebox into a portable WordPress E2E sandbox that can drive the UI and assert browser behavior, not just observe a render. - Declare the backend-agnostic step schema (kinds, validation, evaluate detection) in runtime-core; implement the executor in runtime-playground, keeping the layer boundary clean. - Step kinds: navigate, click, fill, type, press, drag, hover, select, waitFor, evaluate, expect, screenshot, capture — each a 1:1 map onto a stable locator action. Pass the script via steps-json (inline or @path); actions-json is kept as a normalized back-compat alias. - Gate the arbitrary-JS evaluate step behind a dedicated wordpress.browser-actions.evaluate policy capability, separate from the non-JS interaction steps, mirroring the wordpress.run-php posture. - Emit per-step results to files/browser/steps.jsonl (index, kind, selector, ok/fail, timing) and machine-readable expect/evaluate assertions into action-summary.json; support named screenshots; enforce per-step and total-script timeouts with a clean first-failure record. - Recipe-mountable: recipe policy auto-grants the evaluate capability when a browser-actions step opts into an evaluate step. - Document the contract in the README and CLI command catalog; add a step-schema validation unit smoke and extend the browser-actions artifact smoke to a multi-step click/expect/evaluate/screenshot script. Navigate-only behavior is unchanged. Closes #310 --- README.md | 23 +- package.json | 1 + packages/cli/src/index.ts | 63 ++- packages/runtime-core/src/index.ts | 216 +++++++- packages/runtime-playground/src/index.ts | 518 ++++++++++++++---- scripts/browser-actions-artifact-smoke.ts | 68 ++- ...ser-interaction-script-validation-smoke.ts | 77 +++ scripts/discovery-command-smoke.ts | 22 +- 8 files changed, 838 insertions(+), 150 deletions(-) create mode 100644 scripts/browser-interaction-script-validation-smoke.ts diff --git a/README.md b/README.md index 5b129c3..905dc4d 100644 --- a/README.md +++ b/README.md @@ -462,7 +462,7 @@ Supported runtime commands today: - `wordpress.wp-cli`: run WP-CLI; accepts `command='wp option get home'` or plain args. - `wordpress.ability`: execute a registered WordPress Ability; accepts `name=` and optional JSON `input=`. - `wordpress.browser-probe`: boot the live preview, visit `url=` with Playwright, and capture generic browser replay/audit evidence under `files/browser/`. -- `wordpress.browser-actions`: boot the live preview, run generic browser interactions, and capture replay/audit evidence under `files/browser/`. +- `wordpress.browser-actions`: boot the live preview, drive it with an ordered interaction script (`steps-json`), assert browser behavior, and capture replay/audit evidence under `files/browser/`. `wordpress.run-php` loads `/wordpress/wp-load.php` by default. Use `--arg bootstrap=none` for raw PHP. @@ -470,7 +470,26 @@ Supported runtime commands today: `wordpress.browser-probe` accepts `wait-for=domcontentloaded|load|networkidle|selector:|duration`, `duration=s`, and `capture=console,errors,html,network,performance,memory,screenshot`. It records machine-readable evidence refs such as `files/browser/console.jsonl`, `files/browser/errors.jsonl`, `files/browser/network.jsonl`, `files/browser/performance.json`, `files/browser/memory.json`, `files/browser/checkpoints.jsonl`, `files/browser/snapshot.html`, `files/browser/screenshot.png`, and `files/browser/summary.json` when those captures are enabled. The summary includes requested/final URLs, viewport/device metadata, HTML and screenshot hashes, network event counts, optional final/peak browser memory and performance summaries, and a generic `artifact-backed|partial|diagnostic-only` replayability classification. Performance and memory captures use generic browser/CDP data only: JS heap when available, CDP `Performance.getMetrics`, CDP DOM counters, DOM/resource counts and byte totals, and long task counts/duration. Probe scripts may call `window.__wpCodeboxProbeCheckpoint(name, metadata)` when `performance` or `memory` capture is enabled to record named generic checkpoint snapshots. WP Codebox intentionally keeps these browser evidence fields generic; consumers such as eval harnesses may interpret them without WP Codebox adding scoring, grading, or benchmark semantics. -`wordpress.browser-actions` accepts `actions-json=` with ordered `navigate`, `click`, `fill`, `press`, `wait`, and `capture` actions. `navigate` uses `url` plus optional `waitFor=domcontentloaded|load|networkidle`; `click` uses `selector` or `text`; `fill` uses `selector` and `value`; `press` uses `key` plus optional `selector`; `wait` uses `selector` or `waitFor=domcontentloaded|load|networkidle|duration` with `duration=s|ms`. It records `files/browser/actions.jsonl`, `files/browser/action-summary.json`, and optional `console`, `errors`, `network`, `html`, and `screenshot` captures. Failures identify the failed action index/type in the action log, include serialized browser errors, and still write the requested audit artifacts when possible. +`wordpress.browser-actions` drives the preview with an ordered interaction script so Codebox can prove a plugin still *works* under interaction, not just that it renders. Pass the script as `steps-json=` (inline JSON, or `@` to read it from a file); the legacy `actions-json=` shape is still accepted and normalized to steps. Each step is a thin, stable mapping over a Playwright locator action — this is not a test-runner DSL. + +Step kinds: `navigate` (`url`, optional `waitFor=domcontentloaded|load|networkidle`), `click`/`hover` (`selector` or `text`), `fill`/`type` (`selector`, `value`), `press` (`key`, optional `selector`), `drag` (`from` selector, `to` as `{ "selector": ... }` or `{ "x": n, "y": n }`), `select` (`selector`, `value` or `values`), `waitFor` (`selector` or `waitFor=domcontentloaded|load|networkidle|duration|selector:`), `evaluate` (`expression`, optional `assert` to deep-equal the result), `expect` (`selector`, optional `state=visible|hidden|attached|detached|enabled|disabled|checked|unchecked|editable`), and `screenshot` (optional `name` for a named capture). Every step may set its own `timeout=s`; the command also accepts a global `step-timeout=s` (per step) and `timeout=s` (total-script budget). Both are bounded and deterministic — the run stops cleanly on the first failing step, with no silent partial success. + +The arbitrary-JS `evaluate` step is policy-gated **separately** from the non-JS interaction steps: a script containing `evaluate` requires `wordpress.browser-actions.evaluate` in the runtime policy in addition to `wordpress.browser-actions`. Click/fill/drag/expect and friends never require the extra grant, so a consumer can allow UI driving while still forbidding arbitrary page JS. + +It records `files/browser/steps.jsonl` (per-step index, kind, selector, ok/fail, timing, and any named screenshot), `files/browser/action-summary.json` (with a machine-readable `assertions` block of `total`/`passed`/`failed` plus each `expect`/`evaluate` result), named `files/browser/screenshot-.png` captures, and optional `console`, `errors`, `network`, `html`, and `screenshot` artifacts (capture defaults to `steps,console,errors,network,html,screenshot`; `actions` is accepted as an alias for `steps`). Failures identify the failed step index/kind in `steps.jsonl`, include serialized browser errors, and still write the requested audit artifacts when possible. Existing navigate-only invocations (just `url=`, no `steps-json`) behave exactly as before. + +```jsonc +// steps-json: open the editor, drive the crop modal, assert it still works, capture it +[ + { "kind": "click", "selector": "role=button[name='Social']" }, + { "kind": "waitFor", "selector": ".reactEasyCrop_Container" }, + { "kind": "drag", "from": ".reactEasyCrop_CropArea", "to": { "x": 40, "y": 40 } }, + { "kind": "fill", "selector": "#caption", "value": "smoke test" }, + { "kind": "evaluate", "expression": "document.querySelector('.crop').isConnected", "assert": true }, + { "kind": "expect", "selector": ".crop-confirm", "state": "visible" }, + { "kind": "screenshot", "name": "after-crop" } +] +``` WP Codebox defaults to WordPress `7.0` because the agent and AI plugin stacks need the modern WordPress AI surface. Override with `--wp trunk`, `--wp nightly`, or another supported Playground version. diff --git a/package.json b/package.json index d5e1718..ec976f5 100644 --- a/package.json +++ b/package.json @@ -54,6 +54,7 @@ "recipe-browser-smoke": "tsx scripts/recipe-browser-smoke.ts", "browser-probe-artifact-smoke": "tsx scripts/browser-probe-artifact-smoke.ts", "browser-actions-artifact-smoke": "tsx scripts/browser-actions-artifact-smoke.ts", + "browser-interaction-script-validation-smoke": "tsx scripts/browser-interaction-script-validation-smoke.ts", "preview-port-smoke": "tsx scripts/preview-port-smoke.ts", "preview-options-contract-smoke": "tsx scripts/preview-options-contract-smoke.ts", "preview-public-url-canonical-smoke": "tsx scripts/preview-public-url-canonical-smoke.ts", diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index e6d30c3..ebfc1a3 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -6,7 +6,7 @@ import { tmpdir } from "node:os" import { basename, dirname, join, relative, resolve } from "node:path" import { fileURLToPath } from "node:url" import { promisify } from "node:util" -import { SANDBOX_DMC_PARENT_ONLY_ABILITIES, SANDBOX_DMC_SAFE_ABILITIES, SANDBOX_WORKSPACE_ROOT, calculateArtifactManifestFileSha256, checkWorkspacePolicy, commandRegistry, createRuntime, createWorkspaceRecipeJsonSchema, recipeCommandDefinitions, validateRuntimePolicy, verifyArtifactBundle, type ArtifactBundle, type ArtifactBundleVerificationResult, type ArtifactManifest, type CommandDefinition, type ExecutionResult, type MountSpec, type Runtime, type RuntimeInfo, type RuntimePolicy, type SandboxWorkspaceContract, type SandboxWorkspaceMode, type WorkspacePolicyResult, type WorkspaceRecipe, type WorkspaceRecipeExtraPlugin, type WorkspaceRecipeJsonSchema, type WorkspaceRecipePluginRuntime, type WorkspaceRecipePluginRuntimeHealthProbe, type WorkspaceRecipeSiteSeed, type WorkspaceRecipeStagedFile, type WorkspaceRecipeWorkspace } from "@chubes4/wp-codebox-core" +import { SANDBOX_DMC_PARENT_ONLY_ABILITIES, SANDBOX_DMC_SAFE_ABILITIES, SANDBOX_WORKSPACE_ROOT, calculateArtifactManifestFileSha256, checkWorkspacePolicy, commandRegistry, createRuntime, createWorkspaceRecipeJsonSchema, recipeCommandDefinitions, validateBrowserInteractionScript, validateRuntimePolicy, verifyArtifactBundle, type ArtifactBundle, type ArtifactBundleVerificationResult, type ArtifactManifest, type CommandDefinition, type ExecutionResult, type MountSpec, type Runtime, type RuntimeInfo, type RuntimePolicy, type SandboxWorkspaceContract, type SandboxWorkspaceMode, type WorkspacePolicyResult, type WorkspaceRecipe, type WorkspaceRecipeExtraPlugin, type WorkspaceRecipeJsonSchema, type WorkspaceRecipePluginRuntime, type WorkspaceRecipePluginRuntimeHealthProbe, type WorkspaceRecipeSiteSeed, type WorkspaceRecipeStagedFile, type WorkspaceRecipeWorkspace } from "@chubes4/wp-codebox-core" import { createPlaygroundRuntimeBackend } from "@chubes4/wp-codebox-playground" import { agentRuntimeProbeCode, agentSandboxRunCode, resolveSandboxTaskCode } from "./agent-code.js" import { captureStdout, printArtifactVerifyHumanOutput, printBatchHumanOutput, printBlueprintValidateHumanOutput, printBootHumanOutput, printCommandCatalogHumanOutput, printHelp, printHumanOutput, printRecipeHumanOutput, printRecipeSchemaHumanOutput, printRecipeValidateHumanOutput, serializeError } from "./output.js" @@ -3647,6 +3647,48 @@ async function validateRecipeStepArgs(step: WorkspaceRecipe["workflow"]["steps"] return } + if (step.command === "wordpress.browser-actions") { + const stepsJson = recipeStepArgValue(step.args ?? [], "steps-json") + const actionsJson = recipeStepArgValue(step.args ?? [], "actions-json") + const url = recipeStepArgValue(step.args ?? [], "url")?.trim() + if (!stepsJson && !actionsJson && !url) { + addIssue("missing-steps", `${path}.args`, "wordpress.browser-actions requires steps-json= (or actions-json=) or url=.") + } + + if (stepsJson && !stepsJson.startsWith("@")) { + let parsed: unknown + try { + parsed = JSON.parse(stepsJson) + } catch (error) { + addIssue("invalid-steps-json", `${path}.args`, `wordpress.browser-actions steps-json must be valid JSON: ${error instanceof Error ? error.message : String(error)}`) + parsed = undefined + } + if (parsed !== undefined) { + const result = validateBrowserInteractionScript(parsed) + for (const issue of result.issues) { + addIssue("invalid-step", `${path}.args`, `wordpress.browser-actions steps-json[${issue.index}]: ${issue.message}`) + } + } + } + + for (const name of ["step-timeout", "timeout"] as const) { + const value = recipeStepArgValue(step.args ?? [], name) + if (value && !/^(\d+(?:\.\d+)?)(ms|s)$/.test(value)) { + addIssue("invalid-duration", `${path}.args`, `wordpress.browser-actions ${name} must look like 500ms or 2s.`) + } + } + + const capture = recipeStepArgValue(step.args ?? [], "capture") + if (capture) { + for (const item of capture.split(",").map((value) => value.trim()).filter(Boolean)) { + if (!["steps", "actions", "console", "errors", "html", "network", "screenshot"].includes(item)) { + addIssue("invalid-capture", `${path}.args`, `wordpress.browser-actions capture does not support: ${item}`) + } + } + } + return + } + if (step.command === "wordpress.ability") { if (!recipeStepArgValue(step.args ?? [], "name")?.trim()) { addIssue("missing-ability-name", `${path}.args`, "wordpress.ability requires name=.") @@ -3949,6 +3991,12 @@ function recipePolicy(recipe: WorkspaceRecipe): RuntimePolicy { if ((recipe.inputs?.siteSeeds ?? []).some((siteSeed) => siteSeed.type === "fixture")) { commands.unshift("wordpress.run-php") } + // Auto-grant the evaluate capability when a browser-actions step opts into the + // arbitrary-JS escape hatch by including an evaluate step. Recipe authors opt in + // by writing the step; direct `run` invocations still control the gate via --policy. + if (recipeWorkflowSteps(recipe).some(({ step }) => step.command === "wordpress.browser-actions" && recipeStepUsesEvaluate(step))) { + commands.push("wordpress.browser-actions.evaluate") + } return { ...defaultPolicy, @@ -3956,6 +4004,19 @@ function recipePolicy(recipe: WorkspaceRecipe): RuntimePolicy { } } +function recipeStepUsesEvaluate(step: WorkspaceRecipe["workflow"]["steps"][number]): boolean { + const raw = recipeStepArgValue(step.args ?? [], "steps-json") + if (!raw || raw.startsWith("@")) { + return false + } + try { + const parsed = JSON.parse(raw) + return Array.isArray(parsed) && parsed.some((entry) => entry && typeof entry === "object" && (entry as { kind?: unknown }).kind === "evaluate") + } catch { + return false + } +} + function runPolicy(command: string): RuntimePolicy { return { ...defaultPolicy, diff --git a/packages/runtime-core/src/index.ts b/packages/runtime-core/src/index.ts index 4c74f62..8caabe4 100644 --- a/packages/runtime-core/src/index.ts +++ b/packages/runtime-core/src/index.ts @@ -14,6 +14,7 @@ export const RUNTIME_EPISODE_SNAPSHOT_SCHEMA = "wp-codebox/runtime-episode-snaps export const RUNTIME_REFERENCE_MANIFEST_SCHEMA = "wp-codebox/runtime-reference-manifest/v1" as const export const RUNTIME_REPLAY_REFERENCE_INDEX_SCHEMA = "wp-codebox/runtime-replay-reference-index/v1" as const export const RUNTIME_ACTION_OBSERVATION_SCHEMA = "wp-codebox/runtime-action-observation/v1" as const +export const BROWSER_INTERACTION_SCRIPT_SCHEMA = "wp-codebox/browser-interaction-script/v1" as const export type CommandHandlerBinding = | { kind: "playground"; method: string } @@ -180,17 +181,29 @@ export const commandRegistry = [ }, { id: "wordpress.browser-actions", - description: "Run generic browser interactions against the live Playground preview and capture replay/audit evidence artifacts.", + description: "Drive the live Playground preview with an ordered interaction script and capture replay/audit evidence artifacts, including per-step results and machine-readable assertions.", acceptedArgs: [ - { name: "url", description: "Initial preview path or absolute URL to visit when actions-json omits an initial navigate action.", format: "path or URL" }, - { name: "actions-json", description: "Ordered browser actions to run: navigate, click, fill, press, wait, and capture.", required: true, format: "JSON array" }, - { name: "capture", description: "Comma-separated artifacts to capture after interactions.", format: "actions,console,errors,html,network,screenshot" }, + { name: "url", description: "Initial preview path or absolute URL to visit when the script omits an initial navigate step.", format: "path or URL" }, + { name: "steps-json", description: "Ordered interaction script: navigate, click, fill, type, press, drag, hover, select, waitFor, evaluate, expect, screenshot, and capture steps.", format: "JSON array (inline or @)" }, + { name: "actions-json", description: "Back-compat alias for steps-json accepting the legacy navigate/click/fill/press/wait/capture action shape.", format: "JSON array" }, + { name: "step-timeout", description: "Per-step timeout applied to each interaction step.", format: "duration, e.g. 5s or 500ms" }, + { name: "timeout", description: "Total-script timeout bounding the whole interaction run.", format: "duration, e.g. 30s or 1500ms" }, + { name: "capture", description: "Comma-separated artifacts to capture after interactions.", format: "steps,console,errors,html,network,screenshot" }, ], - outputShape: "JSON summary plus files/browser/actions.jsonl, action-summary.json, and optional console/errors/network/html/screenshot artifacts.", - policyRequirement: "Runtime policy commands must include wordpress.browser-actions.", + outputShape: "JSON summary plus files/browser/steps.jsonl, action-summary.json (with assertions pass/fail), named screenshots, and optional console/errors/network/html/screenshot artifacts.", + policyRequirement: "Runtime policy commands must include wordpress.browser-actions. The evaluate step additionally requires wordpress.browser-actions.evaluate.", recipe: true, handler: { kind: "playground", method: "runBrowserActions" }, }, + { + id: "wordpress.browser-actions.evaluate", + description: "Policy capability gating arbitrary page-side JavaScript (the evaluate step) inside wordpress.browser-actions. Non-JS interaction steps do not require this capability.", + acceptedArgs: [], + outputShape: "Policy-only capability; not directly executable. Grant alongside wordpress.browser-actions to permit evaluate steps.", + policyRequirement: "Runtime policy commands must include wordpress.browser-actions.evaluate to run evaluate steps.", + recipe: false, + handler: { kind: "recipe-alias", command: "wordpress.browser-actions" }, + }, { id: "wp-codebox.agent-runtime-probe", description: "Recipe-only probe that boots Agents API, Data Machine, and Data Machine Code and verifies the stack loads.", @@ -1464,6 +1477,13 @@ export interface ArtifactReviewBrowserSummary { errorsFile?: string actions?: string actionCount?: number + steps?: string + stepCount?: number + assertions?: { + total: number + passed: number + failed: number + } summaryFile?: string }> } @@ -2402,6 +2422,190 @@ export interface RuntimeBrowserAction { timeout_ms?: number } +/** + * Backend-agnostic browser interaction step contract (issue #310). + * + * runtime-core declares the schema; a runtime backend (e.g. runtime-playground) + * implements the executor that maps each step onto its driver. Steps are a thin, + * stable mapping over locator-style actions — not a test-runner DSL. + * + * Layer purity: this type knows nothing about Playwright or Playground. It is the + * shared contract any backend can satisfy. + */ +export const BROWSER_INTERACTION_STEP_KINDS = [ + "navigate", + "click", + "fill", + "type", + "press", + "drag", + "hover", + "select", + "waitFor", + "evaluate", + "expect", + "screenshot", + "capture", +] as const + +export type BrowserInteractionStepKind = typeof BROWSER_INTERACTION_STEP_KINDS[number] + +/** Locator/element state checked by an `expect` step. */ +export const BROWSER_INTERACTION_EXPECT_STATES = ["visible", "hidden", "attached", "detached", "enabled", "disabled", "checked", "unchecked", "editable"] as const + +export type BrowserInteractionExpectState = typeof BROWSER_INTERACTION_EXPECT_STATES[number] + +/** Drop target for a `drag` step: an element selector or absolute viewport coordinates. */ +export type BrowserInteractionDragTarget = { selector: string } | { x: number; y: number } + +export interface BrowserInteractionStep { + kind: BrowserInteractionStepKind + /** Stable locator string (CSS, `text=`, `role=button[name='...']`, etc.). */ + selector?: string + /** Navigation target for `navigate`. */ + url?: string + /** Visible-text locator shortcut for `click`/`hover`. */ + text?: string + /** Input value for `fill`/`type`, or option value for `select`. */ + value?: string + /** Keyboard key for `press`. */ + key?: string + /** Wait/load condition: domcontentloaded|load|networkidle|selector:|duration. */ + waitFor?: string + /** Drag source selector for `drag`. */ + from?: string + /** Drag drop target for `drag`. */ + to?: BrowserInteractionDragTarget + /** Option label/value(s) for `select`. */ + values?: string[] + /** Arbitrary page JS for `evaluate` (policy-gated separately). */ + expression?: string + /** Optional expected value an `evaluate` result must deep-equal to assert. */ + assert?: unknown + /** Expected locator state for `expect`. */ + state?: BrowserInteractionExpectState + /** Optional screenshot name for `screenshot`. */ + name?: string + /** Optional wait duration (e.g. 500ms, 2s) for `waitFor`/`navigate`. */ + duration?: string + /** Per-step timeout override (e.g. 5s). */ + timeout?: string +} + +export interface BrowserInteractionStepValidationIssue { + index: number + message: string +} + +export interface BrowserInteractionScriptValidationResult { + valid: boolean + steps: BrowserInteractionStep[] + issues: BrowserInteractionStepValidationIssue[] +} + +function isBrowserInteractionDragTarget(value: unknown): value is BrowserInteractionDragTarget { + if (!isPlainObject(value)) return false + if (typeof value.selector === "string" && value.selector.length > 0) return true + return typeof value.x === "number" && typeof value.y === "number" +} + +/** + * Validate an ordered browser interaction script against the backend-agnostic + * step contract. Returns normalized steps plus per-index issues; backends call + * this before executing so every backend enforces the same contract. + */ +export function validateBrowserInteractionScript(input: unknown): BrowserInteractionScriptValidationResult { + const issues: BrowserInteractionStepValidationIssue[] = [] + const steps: BrowserInteractionStep[] = [] + + if (!Array.isArray(input)) { + return { valid: false, steps, issues: [{ index: -1, message: "browser interaction script must be a JSON array of steps" }] } + } + + input.forEach((raw, index) => { + if (!isPlainObject(raw)) { + issues.push({ index, message: "step must be an object" }) + return + } + + const kind = raw.kind + if (typeof kind !== "string" || !(BROWSER_INTERACTION_STEP_KINDS as readonly string[]).includes(kind)) { + issues.push({ index, message: `step kind must be one of ${BROWSER_INTERACTION_STEP_KINDS.join(", ")}` }) + return + } + + const step = raw as unknown as BrowserInteractionStep + const hasSelector = typeof step.selector === "string" && step.selector.length > 0 + const hasText = typeof step.text === "string" && step.text.length > 0 + + switch (kind as BrowserInteractionStepKind) { + case "navigate": + if (typeof step.url !== "string" || step.url.trim().length === 0) { + issues.push({ index, message: "navigate step requires url" }) + } + break + case "click": + case "hover": + if (!hasSelector && !hasText) { + issues.push({ index, message: `${kind} step requires selector or text` }) + } + break + case "fill": + case "type": + if (!hasSelector) issues.push({ index, message: `${kind} step requires selector` }) + if (typeof step.value !== "string") issues.push({ index, message: `${kind} step requires value` }) + break + case "press": + if (typeof step.key !== "string" || step.key.length === 0) { + issues.push({ index, message: "press step requires key" }) + } + break + case "drag": + if (typeof step.from !== "string" || step.from.length === 0) { + issues.push({ index, message: "drag step requires from selector" }) + } + if (!isBrowserInteractionDragTarget(step.to)) { + issues.push({ index, message: "drag step requires to as { selector } or { x, y }" }) + } + break + case "select": + if (!hasSelector) issues.push({ index, message: "select step requires selector" }) + if (typeof step.value !== "string" && !Array.isArray(step.values)) { + issues.push({ index, message: "select step requires value or values" }) + } + break + case "waitFor": + if (!hasSelector && typeof step.waitFor !== "string") { + issues.push({ index, message: "waitFor step requires selector or waitFor condition" }) + } + break + case "evaluate": + if (typeof step.expression !== "string" || step.expression.trim().length === 0) { + issues.push({ index, message: "evaluate step requires expression" }) + } + break + case "expect": + if (!hasSelector) issues.push({ index, message: "expect step requires selector" }) + if (step.state !== undefined && !(BROWSER_INTERACTION_EXPECT_STATES as readonly string[]).includes(step.state)) { + issues.push({ index, message: `expect step state must be one of ${BROWSER_INTERACTION_EXPECT_STATES.join(", ")}` }) + } + break + case "screenshot": + case "capture": + break + } + + steps.push(step) + }) + + return { valid: issues.length === 0, steps, issues } +} + +/** True when an interaction script contains at least one policy-gated evaluate step. */ +export function browserInteractionScriptUsesEvaluate(steps: readonly BrowserInteractionStep[]): boolean { + return steps.some((step) => step.kind === "evaluate") +} + export interface RuntimeActionAdapterPolicy { mounts?: MountSpec[] writableRoots?: string[] diff --git a/packages/runtime-playground/src/index.ts b/packages/runtime-playground/src/index.ts index 6efaaf7..7ffe6ee 100644 --- a/packages/runtime-playground/src/index.ts +++ b/packages/runtime-playground/src/index.ts @@ -3,7 +3,7 @@ import { copyFile, mkdir, readdir, readFile, realpath, stat, writeFile } from "n import { createServer as createHttpServer, request as httpRequest, type IncomingHttpHeaders, type IncomingMessage, type ServerResponse } from "node:http" import { createServer as createNetServer } from "node:net" import { basename, dirname, join, relative, resolve } from "node:path" -import { RUNTIME_EPISODE_OBSERVATION_SCHEMA, RUNTIME_EPISODE_SNAPSHOT_SCHEMA, assertRuntimeCommandAllowed, getCommandDefinition, runtimeEpisodeDigest, type PlaygroundRuntimeCommandId } from "@chubes4/wp-codebox-core" +import { RUNTIME_EPISODE_OBSERVATION_SCHEMA, RUNTIME_EPISODE_SNAPSHOT_SCHEMA, assertRuntimeCommandAllowed, browserInteractionScriptUsesEvaluate, getCommandDefinition, runtimeEpisodeDigest, validateBrowserInteractionScript, type BrowserInteractionStep, type PlaygroundRuntimeCommandId } from "@chubes4/wp-codebox-core" import { MAX_CAPTURED_MOUNT_FILE_BYTES, MAX_CAPTURED_MOUNT_FILES, @@ -44,6 +44,8 @@ import type { import type { ConsoleMessage, Page, Request, Response } from "playwright" const BROWSER_PROBE_CAPTURE_VALUES = ["console", "errors", "html", "network", "performance", "memory", "screenshot"] as const +const BROWSER_STEP_DEFAULT_TIMEOUT_MS = 15_000 +const BROWSER_SCRIPT_DEFAULT_TIMEOUT_MS = 120_000 const BROWSER_PROBE_PERFORMANCE_INIT_SCRIPT = ` (() => { const state = globalThis.__wpCodeboxBrowserProbe = globalThis.__wpCodeboxBrowserProbe || { checkpoints: [], longTasks: [] }; @@ -558,6 +560,7 @@ interface BrowserProbeArtifact { url: string files: { actions?: string + steps?: string checkpoints?: string console?: string errors?: string @@ -570,6 +573,8 @@ interface BrowserProbeArtifact { } summary: { actions?: number + steps?: number + assertions?: BrowserAssertionsSummary consoleMessages: number errors: number finalUrl: string @@ -585,6 +590,13 @@ interface BrowserProbeArtifact { } } +interface BrowserAssertionsSummary { + total: number + passed: number + failed: number + results: BrowserStepAssertion[] +} + interface BrowserProbeMetricDigest { final: number | null peak: number | null @@ -686,22 +698,36 @@ interface BrowserProbeViewport { type BrowserProbeReplayability = "artifact-backed" | "partial" | "diagnostic-only" -interface BrowserActionRecord { +interface BrowserStepRecord { index: number - type: string + kind: string status: "ok" | "failed" startedAt: string finishedAt: string + durationMs: number url?: string selector?: string text?: string key?: string waitFor?: string duration?: string + /** Machine-readable assertion outcome for expect/evaluate steps. */ + assertion?: BrowserStepAssertion + screenshot?: string finalUrl?: string error?: BrowserProbeErrorRecord } +interface BrowserStepAssertion { + kind: "expect" | "evaluate" + selector?: string + state?: string + expression?: string + expected?: unknown + actual?: unknown + passed: boolean +} + interface PluginCheckArtifact { targetPlugin: string files: { @@ -1518,40 +1544,55 @@ class PlaygroundRuntime implements Runtime { private async runBrowserActions(spec: ExecutionSpec): Promise { const server = await this.bootPlayground() const args = spec.args ?? [] - const actions = browserActionsFromArgs(args) + const steps = await this.browserInteractionStepsFromArgs(args) const initialUrl = argValue(args, "url")?.trim() - if (actions.length === 0 && !initialUrl) { - throw new Error("wordpress.browser-actions requires actions-json= or url=") + if (steps.length === 0 && !initialUrl) { + throw new Error("wordpress.browser-actions requires steps-json= (or actions-json=) or url=") + } + + if (initialUrl && steps[0]?.kind !== "navigate") { + steps.unshift({ kind: "navigate", url: initialUrl }) } - if (initialUrl && actions[0]?.type !== "navigate") { - actions.unshift({ type: "navigate", url: initialUrl }) + // evaluate (arbitrary page JS) is gated by a dedicated policy capability, + // mirroring how wordpress.run-php is gated. Non-JS interaction steps are + // allowed whenever wordpress.browser-actions itself is allowed. + if (browserInteractionScriptUsesEvaluate(steps)) { + assertRuntimeCommandAllowed("wordpress.browser-actions.evaluate", this.spec.policy) } const capture = new Set(commaListArg(args, "capture")) if (capture.size === 0) { - capture.add("actions") + capture.add("steps") capture.add("console") capture.add("errors") capture.add("network") capture.add("html") capture.add("screenshot") } + // Back-compat: "actions" remains an alias for the per-step timeline capture. + if (capture.has("actions")) { + capture.delete("actions") + capture.add("steps") + } for (const item of capture) { - if (!["actions", "console", "errors", "html", "network", "screenshot"].includes(item)) { - throw new Error(`wordpress.browser-actions capture supports actions, console, errors, html, network, screenshot: ${item}`) + if (!["steps", "console", "errors", "html", "network", "screenshot"].includes(item)) { + throw new Error(`wordpress.browser-actions capture supports steps, console, errors, html, network, screenshot: ${item}`) } } + const stepTimeoutMs = durationArg(args, "step-timeout", BROWSER_STEP_DEFAULT_TIMEOUT_MS) + const totalTimeoutMs = durationArg(args, "timeout", BROWSER_SCRIPT_DEFAULT_TIMEOUT_MS) + const browserDirectory = join(this.artifactRoot, "files", "browser") await mkdir(browserDirectory, { recursive: true }) - const actionRecords: BrowserActionRecord[] = [] + const stepRecords: BrowserStepRecord[] = [] const consoleMessages: Record[] = [] const errors: BrowserProbeErrorRecord[] = [] const network: BrowserProbeNetworkRecord[] = [] - const actionsPath = join(browserDirectory, "actions.jsonl") + const stepsPath = join(browserDirectory, "steps.jsonl") const consolePath = join(browserDirectory, "console.jsonl") const errorsPath = join(browserDirectory, "errors.jsonl") const htmlPath = join(browserDirectory, "snapshot.html") @@ -1559,6 +1600,7 @@ class PlaygroundRuntime implements Runtime { const screenshotPath = join(browserDirectory, "screenshot.png") const summaryPath = join(browserDirectory, "action-summary.json") const startedAt = now() + const startedAtMs = Date.now() const { chromium } = await import("playwright") const browser = await chromium.launch() let requestedUrl = initialUrl ? resolveBrowserProbeUrl(initialUrl, server.serverUrl) : server.serverUrl @@ -1582,19 +1624,37 @@ class PlaygroundRuntime implements Runtime { page.on("requestfailed", (request) => network.push(serializeBrowserRequestFailure(request))) } - for (const [index, action] of actions.entries()) { + for (const [index, step] of steps.entries()) { const recordStartedAt = now() + const recordStartedAtMs = Date.now() + // Total-script timeout: stop before starting a step that would exceed the budget. + if (totalTimeoutMs > 0 && recordStartedAtMs - startedAtMs >= totalTimeoutMs) { + const timeoutError = new Error(`wordpress.browser-actions exceeded total timeout of ${totalTimeoutMs}ms before step ${index} (${step.kind})`) + const serialized = serializeBrowserError("probe-error", timeoutError) + errors.push(serialized) + stepRecords.push(browserStepRecord(index, step, "failed", recordStartedAt, recordStartedAtMs, page.url(), { error: serialized })) + pendingError = timeoutError + break + } try { - await executeBrowserAction(page, action, server.serverUrl) + const outcome = await executeBrowserInteractionStep(page, step, server.serverUrl, stepTimeoutMs, screenshotPath, browserDirectory) finalUrl = page.url() - if (action.type === "navigate") { - requestedUrl = resolveBrowserActionUrl(action, server.serverUrl) + if (step.kind === "navigate") { + requestedUrl = resolveBrowserProbeUrl((step.url ?? "").trim(), server.serverUrl) + } + if (outcome.screenshot && capture.has("screenshot") && outcome.screenshotIsDefault) { + screenshotSha256 = await fileSha256(screenshotPath) + } + stepRecords.push(browserStepRecord(index, step, "ok", recordStartedAt, recordStartedAtMs, finalUrl, outcome)) + // A failed expect/evaluate assertion is a clean step failure: no silent partial success. + if (outcome.assertion && !outcome.assertion.passed) { + pendingError = new Error(`wordpress.browser-actions ${step.kind} assertion failed at step ${index}`) + break } - actionRecords.push(browserActionRecord(index, action, "ok", recordStartedAt, finalUrl)) } catch (error) { const serialized = serializeBrowserError("probe-error", error) errors.push(serialized) - actionRecords.push(browserActionRecord(index, action, "failed", recordStartedAt, page.url(), serialized)) + stepRecords.push(browserStepRecord(index, step, "failed", recordStartedAt, recordStartedAtMs, page.url(), { error: serialized })) pendingError = error instanceof Error ? error : new Error(String(error)) break } @@ -1612,8 +1672,8 @@ class PlaygroundRuntime implements Runtime { } } finally { await browser.close() - if (capture.has("actions")) { - await writeFile(actionsPath, jsonLines(actionRecords)) + if (capture.has("steps")) { + await writeFile(stepsPath, jsonLines(stepRecords)) } if (capture.has("console")) { await writeFile(consolePath, jsonLines(consoleMessages)) @@ -1625,11 +1685,12 @@ class PlaygroundRuntime implements Runtime { await writeFile(networkPath, jsonLines(network)) } + const assertions = browserAssertionsSummary(stepRecords) const artifact: BrowserProbeArtifact = { requestedUrl, url: requestedUrl, files: { - ...(capture.has("actions") ? { actions: "files/browser/actions.jsonl" } : {}), + ...(capture.has("steps") ? { steps: "files/browser/steps.jsonl" } : {}), ...(capture.has("console") ? { console: "files/browser/console.jsonl" } : {}), ...(capture.has("errors") ? { errors: "files/browser/errors.jsonl" } : {}), ...(capture.has("html") ? { html: "files/browser/snapshot.html" } : {}), @@ -1638,7 +1699,9 @@ class PlaygroundRuntime implements Runtime { summary: "files/browser/action-summary.json", }, summary: { - actions: actionRecords.length, + actions: stepRecords.length, + steps: stepRecords.length, + ...(assertions.total > 0 ? { assertions } : {}), consoleMessages: consoleMessages.length, errors: errors.length, finalUrl, @@ -1655,7 +1718,10 @@ class PlaygroundRuntime implements Runtime { requestedUrl, finalUrl, capture: [...capture].sort(), - actions: actionRecords, + stepTimeoutMs, + totalTimeoutMs, + steps: stepRecords, + ...(assertions.total > 0 ? { assertions } : {}), startedAt, finishedAt: now(), files: artifact.files, @@ -1669,7 +1735,7 @@ class PlaygroundRuntime implements Runtime { } if (pendingError) { - throw new Error(`wordpress.browser-actions failed after ${actionRecords.length} action(s): ${pendingError.message}`) + throw new Error(`wordpress.browser-actions failed after ${stepRecords.length} step(s): ${pendingError.message}`) } return `${JSON.stringify({ @@ -1678,10 +1744,39 @@ class PlaygroundRuntime implements Runtime { finalUrl: this.browserProbes.at(-1)?.summary.finalUrl ?? finalUrl, files: this.browserProbes.at(-1)?.files, summary: this.browserProbes.at(-1)?.summary, - actions: actionRecords, + steps: stepRecords, }, null, 2)}\n` } + private async browserInteractionStepsFromArgs(args: string[]): Promise { + const stepsRaw = argValue(args, "steps-json") + if (typeof stepsRaw === "string" && stepsRaw.trim().length > 0) { + const parsed = await this.parseBrowserStepsPayload(stepsRaw.trim(), "steps-json") + const result = validateBrowserInteractionScript(parsed) + if (!result.valid) { + throw new Error(`wordpress.browser-actions steps-json is invalid: ${result.issues.map((issue) => `[${issue.index}] ${issue.message}`).join("; ")}`) + } + return result.steps + } + + // Back-compat: accept the legacy actions-json shape and normalize it to steps. + const legacy = browserActionsFromArgs(args) + return legacy.map(normalizeLegacyBrowserAction) + } + + private async parseBrowserStepsPayload(raw: string, name: string): Promise { + let text = raw + if (raw.startsWith("@")) { + const path = raw.slice(1) + text = await readFile(resolve(path), "utf8") + } + try { + return JSON.parse(text) + } catch (error) { + throw new Error(`${name} must be valid JSON: ${error instanceof Error ? error.message : String(error)}`) + } + } + private async runPhp(spec: ExecutionSpec): Promise { const server = await this.bootPlayground() const code = await this.phpCodeFromArgs(spec.args ?? []) @@ -2593,8 +2688,11 @@ echo wp_json_encode( array( checkpoints: probe.files.checkpoints, errorsFile: probe.files.errors, memory: probe.files.memory, - actions: probe.files.actions, - actionCount: probe.summary.actions, + actions: probe.files.steps ?? probe.files.actions, + actionCount: probe.summary.steps ?? probe.summary.actions, + steps: probe.files.steps, + stepCount: probe.summary.steps, + ...(probe.summary.assertions ? { assertions: { total: probe.summary.assertions.total, passed: probe.summary.assertions.passed, failed: probe.summary.assertions.failed } } : {}), performance: probe.files.performance, summaryFile: probe.files.summary, })), @@ -2608,6 +2706,9 @@ echo wp_json_encode( array( const files = new Map() for (const probe of this.browserProbes) { + if (probe.files.steps) { + files.set(probe.files.steps, { kind: "browser-steps", contentType: "application/x-ndjson" }) + } if (probe.files.actions) { files.set(probe.files.actions, { kind: "browser-actions", contentType: "application/x-ndjson" }) } @@ -2658,7 +2759,7 @@ echo wp_json_encode( array( private async redactBrowserArtifacts(redactor: ArtifactRedactor): Promise { for (const probe of this.browserProbes) { - for (const path of [probe.files.actions, probe.files.checkpoints, probe.files.console, probe.files.errors, probe.files.html, probe.files.memory, probe.files.network, probe.files.performance, probe.files.summary]) { + for (const path of [probe.files.steps, probe.files.actions, probe.files.checkpoints, probe.files.console, probe.files.errors, probe.files.html, probe.files.memory, probe.files.network, probe.files.performance, probe.files.summary]) { if (!path) { continue } @@ -2767,145 +2868,324 @@ async function navigateBrowserProbe(page: Page, url: string, waitFor: string, du throw new Error(`wordpress.browser-probe wait-for supports domcontentloaded, load, networkidle, selector:, duration: ${waitFor}`) } -type BrowserActionInput = Record & { type: string } - -function browserActionsFromArgs(args: string[]): BrowserActionInput[] { - return jsonArrayArg(args, "actions-json").map((action, index) => { - if (!action || typeof action !== "object" || Array.isArray(action)) { - throw new Error(`wordpress.browser-actions actions-json[${index}] must be an object`) - } - const typedAction = action as BrowserActionInput - if (typeof typedAction.type !== "string" || typedAction.type.length === 0) { - throw new Error(`wordpress.browser-actions actions-json[${index}].type is required`) - } - return typedAction - }) +interface BrowserStepOutcome { + assertion?: BrowserStepAssertion + screenshot?: string + screenshotIsDefault?: boolean + error?: BrowserProbeErrorRecord } -async function executeBrowserAction(page: Page, action: BrowserActionInput, baseUrl: string): Promise { - if (action.type === "navigate") { - await page.goto(resolveBrowserActionUrl(action, baseUrl), { waitUntil: browserActionLoadState(action.waitFor) }) - return - } - - if (action.type === "click") { - if (typeof action.selector === "string" && action.selector.length > 0) { - await page.click(action.selector) - return +/** + * Execute a single backend-agnostic interaction step against a Playwright page. + * Each non-evaluate kind maps 1:1 to a stable locator action. evaluate is the only + * arbitrary-JS escape hatch and is policy-gated by the caller before this runs. + */ +async function executeBrowserInteractionStep( + page: Page, + step: BrowserInteractionStep, + baseUrl: string, + stepTimeoutMs: number, + defaultScreenshotPath: string, + browserDirectory: string, +): Promise { + const timeout = browserStepTimeoutMs(step, stepTimeoutMs) + + switch (step.kind) { + case "navigate": { + const url = resolveBrowserProbeUrl((step.url ?? "").trim(), baseUrl) + await page.goto(url, { waitUntil: browserActionLoadState(step.waitFor), timeout }) + return {} + } + case "click": { + await browserStepLocator(page, step).click({ timeout }) + return {} + } + case "hover": { + await browserStepLocator(page, step).hover({ timeout }) + return {} + } + case "fill": { + await page.locator(requireSelector(step, "fill")).fill(String(step.value ?? ""), { timeout }) + return {} + } + case "type": { + const locator = page.locator(requireSelector(step, "type")) + await locator.click({ timeout }) + await locator.pressSequentially(String(step.value ?? ""), { timeout }) + return {} + } + case "press": { + const key = String(step.key ?? "") + if (typeof step.selector === "string" && step.selector.length > 0) { + await page.locator(step.selector).press(key, { timeout }) + } else { + await page.keyboard.press(key) + } + return {} + } + case "drag": { + const source = page.locator(requireFrom(step)) + if (step.to && "selector" in step.to) { + await source.dragTo(page.locator(step.to.selector), { timeout }) + } else if (step.to) { + const box = await source.boundingBox({ timeout }) + const startX = box ? box.x + box.width / 2 : 0 + const startY = box ? box.y + box.height / 2 : 0 + await page.mouse.move(startX, startY) + await page.mouse.down() + await page.mouse.move(step.to.x, step.to.y, { steps: 8 }) + await page.mouse.up() + } + return {} + } + case "select": { + const locator = page.locator(requireSelector(step, "select")) + const values = Array.isArray(step.values) ? step.values : [String(step.value ?? "")] + await locator.selectOption(values, { timeout }) + return {} + } + case "waitFor": { + await browserStepWaitFor(page, step, timeout) + return {} + } + case "evaluate": { + const result = await page.evaluate(async (source) => { + // Support both a bare expression ("a.b.c") and a multi-statement body + // that returns explicitly. If the source already returns, run it as a + // body; otherwise evaluate it as an expression and return its value. + const body = /(^|[^.\w])return[\s(;]/.test(source) ? source : `return (\n${source}\n)` + const run = new Function(`return (async () => {\n${body}\n})()`) + return run() + }, String(step.expression ?? "")) + if (Object.prototype.hasOwnProperty.call(step, "assert")) { + const passed = browserDeepEqual(result, step.assert) + return { + assertion: { kind: "evaluate", expression: step.expression, expected: step.assert, actual: result, passed }, + } + } + return {} } - if (typeof action.text === "string" && action.text.length > 0) { - await page.getByText(action.text).click() - return + case "expect": { + const selector = requireSelector(step, "expect") + const state = step.state ?? "visible" + const passed = await browserExpectState(page, selector, state, timeout) + return { assertion: { kind: "expect", selector, state, passed } } } - throw new Error("wordpress.browser-actions click requires selector or text") - } - - if (action.type === "fill") { - if (typeof action.selector !== "string" || action.selector.length === 0) { - throw new Error("wordpress.browser-actions fill requires selector") - } - if (typeof action.value !== "string") { - throw new Error("wordpress.browser-actions fill requires value") + case "screenshot": { + const path = typeof step.name === "string" && step.name.length > 0 + ? join(browserDirectory, `screenshot-${sanitizeScreenshotName(step.name)}.png`) + : defaultScreenshotPath + await page.screenshot({ path, fullPage: true }) + const isDefault = path === defaultScreenshotPath + return { + screenshot: isDefault ? "files/browser/screenshot.png" : `files/browser/${basename(path)}`, + screenshotIsDefault: isDefault, + } } - await page.fill(action.selector, action.value) - return + case "capture": + return {} } - if (action.type === "press") { - if (typeof action.key !== "string" || action.key.length === 0) { - throw new Error("wordpress.browser-actions press requires key") - } - if (typeof action.selector === "string" && action.selector.length > 0) { - await page.press(action.selector, action.key) - return - } - await page.keyboard.press(action.key) - return - } + throw new Error(`wordpress.browser-actions step kind is not supported: ${step.kind}`) +} - if (action.type === "wait") { - await waitForBrowserAction(page, action) - return +function browserStepLocator(page: Page, step: BrowserInteractionStep) { + if (typeof step.selector === "string" && step.selector.length > 0) { + return page.locator(step.selector) } - - if (action.type === "capture") { - return + if (typeof step.text === "string" && step.text.length > 0) { + return page.getByText(step.text) } - - throw new Error(`wordpress.browser-actions action type is not supported: ${action.type}`) + throw new Error(`wordpress.browser-actions ${step.kind} requires selector or text`) } -function resolveBrowserActionUrl(action: BrowserActionInput, baseUrl: string): string { - if (typeof action.url !== "string" || action.url.trim().length === 0) { - throw new Error("wordpress.browser-actions navigate requires url") +function requireSelector(step: BrowserInteractionStep, kind: string): string { + if (typeof step.selector !== "string" || step.selector.length === 0) { + throw new Error(`wordpress.browser-actions ${kind} requires selector`) } - return resolveBrowserProbeUrl(action.url.trim(), baseUrl) + return step.selector } -function browserActionLoadState(waitFor: unknown): "domcontentloaded" | "load" | "networkidle" { - if (waitFor === undefined || waitFor === null || waitFor === "") { - return "domcontentloaded" - } - if (waitFor === "domcontentloaded" || waitFor === "load" || waitFor === "networkidle") { - return waitFor +function requireFrom(step: BrowserInteractionStep): string { + if (typeof step.from !== "string" || step.from.length === 0) { + throw new Error("wordpress.browser-actions drag requires from selector") } - throw new Error(`wordpress.browser-actions navigate waitFor supports domcontentloaded, load, networkidle: ${waitFor}`) + return step.from } -async function waitForBrowserAction(page: Page, action: BrowserActionInput): Promise { - if (typeof action.selector === "string" && action.selector.length > 0) { - await page.waitForSelector(action.selector) +async function browserStepWaitFor(page: Page, step: BrowserInteractionStep, timeout: number): Promise { + if (typeof step.selector === "string" && step.selector.length > 0) { + await page.locator(step.selector).waitFor({ timeout }) return } - - const waitFor = typeof action.waitFor === "string" ? action.waitFor : "load" + const waitFor = typeof step.waitFor === "string" ? step.waitFor : "load" if (waitFor === "domcontentloaded" || waitFor === "load" || waitFor === "networkidle") { await page.waitForLoadState(waitFor) return } if (waitFor === "duration") { - await page.waitForTimeout(browserActionDurationMs(action)) + await page.waitForTimeout(durationStringMs(step.duration)) return } + if (waitFor.startsWith("selector:")) { + await page.locator(waitFor.slice("selector:".length)).waitFor({ timeout }) + return + } + throw new Error(`wordpress.browser-actions waitFor supports selector, domcontentloaded, load, networkidle, duration, selector:: ${waitFor}`) +} + +async function browserExpectState(page: Page, selector: string, state: string, timeout: number): Promise { + const locator = page.locator(selector) + try { + switch (state) { + case "visible": + case "hidden": + case "attached": + case "detached": + await locator.waitFor({ state, timeout }) + return true + case "enabled": + await locator.waitFor({ state: "visible", timeout }) + return await locator.isEnabled() + case "disabled": + await locator.waitFor({ state: "visible", timeout }) + return await locator.isDisabled() + case "checked": + await locator.waitFor({ state: "visible", timeout }) + return await locator.isChecked() + case "unchecked": + await locator.waitFor({ state: "visible", timeout }) + return !(await locator.isChecked()) + case "editable": + await locator.waitFor({ state: "visible", timeout }) + return await locator.isEditable() + default: + return false + } + } catch { + return false + } +} - throw new Error(`wordpress.browser-actions wait supports selector, domcontentloaded, load, networkidle, duration: ${waitFor}`) +function browserStepTimeoutMs(step: BrowserInteractionStep, fallbackMs: number): number { + if (typeof step.timeout === "string" && step.timeout.length > 0) { + return durationStringMs(step.timeout) + } + return fallbackMs } -function browserActionDurationMs(action: BrowserActionInput): number { - const raw = typeof action.duration === "string" ? action.duration : "0ms" +function durationStringMs(raw: string | undefined): number { + if (!raw) { + return 0 + } const match = raw.trim().match(/^(\d+(?:\.\d+)?)(ms|s)$/) if (!match) { - throw new Error("wordpress.browser-actions duration must be a duration like 500ms or 2s") + throw new Error(`wordpress.browser-actions duration must be a duration like 500ms or 2s: ${raw}`) } const value = Number.parseFloat(match[1]) return Math.max(0, Math.round(match[2] === "ms" ? value : value * 1000)) } -function browserActionRecord( +function sanitizeScreenshotName(name: string): string { + return name.replace(/[^a-zA-Z0-9._-]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80) || "step" +} + +function browserDeepEqual(a: unknown, b: unknown): boolean { + return stableStringify(a) === stableStringify(b) +} + +function stableStringify(value: unknown): string { + if (value === null || typeof value !== "object") { + return JSON.stringify(value) ?? "null" + } + if (Array.isArray(value)) { + return `[${value.map(stableStringify).join(",")}]` + } + const keys = Object.keys(value as Record).sort() + return `{${keys.map((key) => `${JSON.stringify(key)}:${stableStringify((value as Record)[key])}`).join(",")}}` +} + +function browserStepRecord( index: number, - action: BrowserActionInput, - status: BrowserActionRecord["status"], + step: BrowserInteractionStep, + status: BrowserStepRecord["status"], startedAt: string, + startedAtMs: number, finalUrl: string, - error?: BrowserProbeErrorRecord, -): BrowserActionRecord { + outcome: BrowserStepOutcome, +): BrowserStepRecord { return { index, - type: action.type, + kind: step.kind, status, startedAt, finishedAt: now(), - ...(typeof action.url === "string" ? { url: action.url } : {}), - ...(typeof action.selector === "string" ? { selector: action.selector } : {}), - ...(typeof action.text === "string" ? { text: action.text } : {}), - ...(typeof action.key === "string" ? { key: action.key } : {}), - ...(typeof action.waitFor === "string" ? { waitFor: action.waitFor } : {}), - ...(typeof action.duration === "string" ? { duration: action.duration } : {}), + durationMs: Math.max(0, Date.now() - startedAtMs), + ...(typeof step.url === "string" ? { url: step.url } : {}), + ...(typeof step.selector === "string" ? { selector: step.selector } : {}), + ...(typeof step.text === "string" ? { text: step.text } : {}), + ...(typeof step.key === "string" ? { key: step.key } : {}), + ...(typeof step.waitFor === "string" ? { waitFor: step.waitFor } : {}), + ...(typeof step.duration === "string" ? { duration: step.duration } : {}), + ...(outcome.assertion ? { assertion: outcome.assertion } : {}), + ...(outcome.screenshot ? { screenshot: outcome.screenshot } : {}), finalUrl, - ...(error ? { error } : {}), + ...(outcome.error ? { error: outcome.error } : {}), } } +function browserAssertionsSummary(records: BrowserStepRecord[]): BrowserAssertionsSummary { + const results = records + .map((record) => record.assertion) + .filter((assertion): assertion is BrowserStepAssertion => assertion !== undefined) + const passed = results.filter((assertion) => assertion.passed).length + return { + total: results.length, + passed, + failed: results.length - passed, + results, + } +} + +/** Normalize a legacy actions-json action into the steps contract. */ +function normalizeLegacyBrowserAction(action: BrowserActionInput): BrowserInteractionStep { + const kind = action.type === "wait" ? "waitFor" : (action.type as BrowserInteractionStep["kind"]) + const step: BrowserInteractionStep = { kind } + if (typeof action.url === "string") step.url = action.url + if (typeof action.selector === "string") step.selector = action.selector + if (typeof action.text === "string") step.text = action.text + if (typeof action.value === "string") step.value = action.value + if (typeof action.key === "string") step.key = action.key + if (typeof action.waitFor === "string") step.waitFor = action.waitFor + if (typeof action.duration === "string") step.duration = action.duration + return step +} + +type BrowserActionInput = Record & { type: string } + +function browserActionsFromArgs(args: string[]): BrowserActionInput[] { + return jsonArrayArg(args, "actions-json").map((action, index) => { + if (!action || typeof action !== "object" || Array.isArray(action)) { + throw new Error(`wordpress.browser-actions actions-json[${index}] must be an object`) + } + const typedAction = action as BrowserActionInput + if (typeof typedAction.type !== "string" || typedAction.type.length === 0) { + throw new Error(`wordpress.browser-actions actions-json[${index}].type is required`) + } + return typedAction + }) +} + +function browserActionLoadState(waitFor: unknown): "domcontentloaded" | "load" | "networkidle" { + if (waitFor === undefined || waitFor === null || waitFor === "") { + return "domcontentloaded" + } + if (waitFor === "domcontentloaded" || waitFor === "load" || waitFor === "networkidle") { + return waitFor + } + throw new Error(`wordpress.browser-actions navigate waitFor supports domcontentloaded, load, networkidle: ${waitFor}`) +} + function resolveBrowserProbeUrl(pathOrUrl: string, baseUrl: string): string { try { return new URL(pathOrUrl).toString() diff --git a/scripts/browser-actions-artifact-smoke.ts b/scripts/browser-actions-artifact-smoke.ts index c12eefe..c0037e0 100644 --- a/scripts/browser-actions-artifact-smoke.ts +++ b/scripts/browser-actions-artifact-smoke.ts @@ -22,6 +22,11 @@ add_action('wp_footer', function () { }); `) +// Exercise the full interaction-script contract: an ordered multi-step script that +// drives the UI (fill + click), waits, asserts browser behavior (expect + evaluate +// with a deep-equal assert), and captures a named screenshot. evaluate is the +// policy-gated escape hatch — the recipe auto-grants wordpress.browser-actions.evaluate +// because the script includes an evaluate step. await writeFile(recipePath, `${JSON.stringify({ schema: "wp-codebox/workspace-recipe/v1", inputs: { @@ -38,13 +43,17 @@ await writeFile(recipePath, `${JSON.stringify({ { command: "wordpress.browser-actions", args: [ - `actions-json=${JSON.stringify([ - { type: "navigate", url: "/", waitFor: "load" }, - { type: "fill", selector: "#wp-codebox-name", value: "Runtime" }, - { type: "click", selector: "#wp-codebox-button" }, - { type: "wait", selector: '#wp-codebox-result[data-state="done"]' }, + "url=/", + `steps-json=${JSON.stringify([ + { kind: "waitFor", selector: "#wp-codebox-button" }, + { kind: "fill", selector: "#wp-codebox-name", value: "Runtime" }, + { kind: "click", selector: "#wp-codebox-button" }, + { kind: "waitFor", selector: '#wp-codebox-result[data-state="done"]' }, + { kind: "expect", selector: "#wp-codebox-result", state: "visible" }, + { kind: "evaluate", expression: "document.getElementById('wp-codebox-result').dataset.state", assert: "done" }, + { kind: "screenshot", name: "after-apply" }, ])}`, - "capture=actions,console,errors,html,network,screenshot", + "capture=steps,console,errors,html,network,screenshot", ], }, ], @@ -66,52 +75,69 @@ assert.equal(output.success, true, output.error?.message ?? "recipe-run failed") assert.ok(output.artifacts?.directory, "recipe-run should return an artifact directory") const artifactDirectory = output.artifacts.directory -const actionsPath = join(artifactDirectory, "files", "browser", "actions.jsonl") +const stepsPath = join(artifactDirectory, "files", "browser", "steps.jsonl") const consolePath = join(artifactDirectory, "files", "browser", "console.jsonl") const htmlPath = join(artifactDirectory, "files", "browser", "snapshot.html") const summaryPath = join(artifactDirectory, "files", "browser", "action-summary.json") +const namedScreenshotPath = join(artifactDirectory, "files", "browser", "screenshot-after-apply.png") const manifestPath = join(artifactDirectory, "manifest.json") const reviewPath = join(artifactDirectory, "files", "review.json") -assert.equal(existsSync(actionsPath), true, "actions.jsonl should be captured") +assert.equal(existsSync(stepsPath), true, "steps.jsonl should be captured") assert.equal(existsSync(consolePath), true, "console.jsonl should be captured") assert.equal(existsSync(htmlPath), true, "snapshot.html should be captured") assert.equal(existsSync(summaryPath), true, "action-summary.json should be captured") +assert.equal(existsSync(namedScreenshotPath), true, "named screenshot should be captured") -const actionLog = await readFile(actionsPath, "utf8") +const stepsLog = await readFile(stepsPath, "utf8") const consoleLog = await readFile(consolePath, "utf8") const htmlSnapshot = await readFile(htmlPath, "utf8") -assert.match(actionLog, /"type":"navigate"/) -assert.match(actionLog, /"type":"fill"/) -assert.match(actionLog, /"type":"click"/) -assert.match(actionLog, /"type":"wait"/) +assert.match(stepsLog, /"kind":"fill"/) +assert.match(stepsLog, /"kind":"click"/) +assert.match(stepsLog, /"kind":"expect"/) +assert.match(stepsLog, /"kind":"evaluate"/) +assert.match(stepsLog, /"kind":"screenshot"/) +assert.match(stepsLog, /"durationMs":/) assert.match(consoleLog, /wp-codebox browser action completed/) assert.match(htmlSnapshot, /Hello Runtime/) const summary = JSON.parse(await readFile(summaryPath, "utf8")) as { schema: string finalUrl: string - files: { actions?: string; html?: string; screenshot?: string; summary: string } - summary: { actions: number; replayability: string; htmlSnapshot: boolean } + files: { steps?: string; html?: string; screenshot?: string; summary: string } + assertions?: { total: number; passed: number; failed: number; results: Array<{ kind: string; passed: boolean }> } + summary: { steps: number; actions: number; replayability: string; htmlSnapshot: boolean; assertions?: { total: number; passed: number; failed: number } } } assert.equal(summary.schema, "wp-codebox/browser-actions/v1") assert.equal(summary.finalUrl.endsWith("/"), true, "summary should include final URL") -assert.equal(summary.files.actions, "files/browser/actions.jsonl") +assert.equal(summary.files.steps, "files/browser/steps.jsonl") assert.equal(summary.files.html, "files/browser/snapshot.html") assert.equal(summary.files.summary, "files/browser/action-summary.json") -assert.equal(summary.summary.actions, 4) +assert.equal(summary.summary.steps, 8) assert.equal(summary.summary.replayability, "artifact-backed") assert.equal(summary.summary.htmlSnapshot, true) +// Machine-readable assertions: an expect + an evaluate(assert), both passing. +assert.ok(summary.assertions, "summary should include an assertions block") +assert.equal(summary.assertions.total, 2) +assert.equal(summary.assertions.passed, 2) +assert.equal(summary.assertions.failed, 0) +assert.ok(summary.assertions.results.some((result) => result.kind === "expect" && result.passed)) +assert.ok(summary.assertions.results.some((result) => result.kind === "evaluate" && result.passed)) +assert.equal(summary.summary.assertions?.total, 2) +assert.equal(summary.summary.assertions?.passed, 2) + const manifest = JSON.parse(await readFile(manifestPath, "utf8")) as { files: Array<{ path: string; kind: string }> } -assert.ok(manifest.files.some((file) => file.path === "files/browser/actions.jsonl" && file.kind === "browser-actions")) +assert.ok(manifest.files.some((file) => file.path === "files/browser/steps.jsonl" && file.kind === "browser-steps")) assert.ok(manifest.files.some((file) => file.path === "files/browser/action-summary.json" && file.kind === "browser-summary")) -const review = JSON.parse(await readFile(reviewPath, "utf8")) as { browser?: { probes?: Array<{ actions?: string; actionCount?: number; html?: string; summaryFile?: string }> } } -assert.equal(review.browser?.probes?.[0]?.actions, "files/browser/actions.jsonl") -assert.equal(review.browser?.probes?.[0]?.actionCount, 4) +const review = JSON.parse(await readFile(reviewPath, "utf8")) as { browser?: { probes?: Array<{ steps?: string; stepCount?: number; html?: string; summaryFile?: string; assertions?: { total: number; passed: number; failed: number } }> } } +assert.equal(review.browser?.probes?.[0]?.steps, "files/browser/steps.jsonl") +assert.equal(review.browser?.probes?.[0]?.stepCount, 8) assert.equal(review.browser?.probes?.[0]?.html, "files/browser/snapshot.html") assert.equal(review.browser?.probes?.[0]?.summaryFile, "files/browser/action-summary.json") +assert.equal(review.browser?.probes?.[0]?.assertions?.total, 2) +assert.equal(review.browser?.probes?.[0]?.assertions?.passed, 2) console.log(`Browser actions artifact smoke passed: ${artifactDirectory}`) diff --git a/scripts/browser-interaction-script-validation-smoke.ts b/scripts/browser-interaction-script-validation-smoke.ts new file mode 100644 index 0000000..2a5e1dc --- /dev/null +++ b/scripts/browser-interaction-script-validation-smoke.ts @@ -0,0 +1,77 @@ +import assert from "node:assert/strict" +import { + BROWSER_INTERACTION_STEP_KINDS, + browserInteractionScriptUsesEvaluate, + validateBrowserInteractionScript, +} from "@chubes4/wp-codebox-core" + +// Backend-agnostic step-schema validation unit smoke (issue #310). Exercises the +// contract declared in runtime-core without booting a browser, so the parsing/ +// validation logic is covered even when Playwright is unavailable. + +// Every documented step kind is recognized. +const everyKind = BROWSER_INTERACTION_STEP_KINDS.map((kind) => { + switch (kind) { + case "navigate": + return { kind, url: "/" } + case "click": + case "hover": + return { kind, selector: ".thing" } + case "fill": + case "type": + return { kind, selector: "#field", value: "x" } + case "press": + return { kind, key: "Enter" } + case "drag": + return { kind, from: ".source", to: { x: 10, y: 20 } } + case "select": + return { kind, selector: "#sel", value: "a" } + case "waitFor": + return { kind, selector: ".ready" } + case "evaluate": + return { kind, expression: "1 + 1", assert: 2 } + case "expect": + return { kind, selector: ".ok", state: "visible" } + case "screenshot": + return { kind, name: "shot" } + case "capture": + return { kind } + } +}) + +const allValid = validateBrowserInteractionScript(everyKind) +assert.equal(allValid.valid, true, `expected all kinds valid, got: ${JSON.stringify(allValid.issues)}`) +assert.equal(allValid.steps.length, everyKind.length) + +// evaluate detection drives the separate policy gate. +assert.equal(browserInteractionScriptUsesEvaluate(allValid.steps), true) +assert.equal(browserInteractionScriptUsesEvaluate(validateBrowserInteractionScript([{ kind: "click", selector: ".x" }]).steps), false) + +// Non-array input is rejected. +assert.equal(validateBrowserInteractionScript({ kind: "click" }).valid, false) + +// Unknown kind is rejected with a per-index issue. +const unknownKind = validateBrowserInteractionScript([{ kind: "teleport" }]) +assert.equal(unknownKind.valid, false) +assert.equal(unknownKind.issues[0]?.index, 0) + +// Missing required fields are flagged per kind. +assert.equal(validateBrowserInteractionScript([{ kind: "navigate" }]).valid, false) +assert.equal(validateBrowserInteractionScript([{ kind: "fill", selector: "#a" }]).valid, false) // missing value +assert.equal(validateBrowserInteractionScript([{ kind: "press" }]).valid, false) // missing key +assert.equal(validateBrowserInteractionScript([{ kind: "drag", from: ".a" }]).valid, false) // missing to +assert.equal(validateBrowserInteractionScript([{ kind: "evaluate" }]).valid, false) // missing expression +assert.equal(validateBrowserInteractionScript([{ kind: "expect" }]).valid, false) // missing selector + +// drag accepts both selector and coordinate drop targets. +assert.equal(validateBrowserInteractionScript([{ kind: "drag", from: ".a", to: { selector: ".b" } }]).valid, true) +assert.equal(validateBrowserInteractionScript([{ kind: "drag", from: ".a", to: { x: 1, y: 2 } }]).valid, true) + +// expect rejects an unknown state. +assert.equal(validateBrowserInteractionScript([{ kind: "expect", selector: ".a", state: "glowing" }]).valid, false) + +// click accepts either selector or text. +assert.equal(validateBrowserInteractionScript([{ kind: "click", text: "Save" }]).valid, true) +assert.equal(validateBrowserInteractionScript([{ kind: "click" }]).valid, false) + +console.log("Browser interaction script validation smoke passed") diff --git a/scripts/discovery-command-smoke.ts b/scripts/discovery-command-smoke.ts index c509b1e..61d6b8d 100644 --- a/scripts/discovery-command-smoke.ts +++ b/scripts/discovery-command-smoke.ts @@ -28,6 +28,7 @@ interface RecipeSchemaOutput { jsonSchema: Record } +// Recipe-eligible commands (recipe: true) feed the recipe JSON schema factory. const expectedCommandIds = [ "inspect-mounted-inputs", "wordpress.run-php", @@ -44,6 +45,25 @@ const expectedCommandIds = [ "wp-codebox.agent-sandbox-run", ] +// Full command catalog (every registered command, including policy-only capabilities +// such as wordpress.browser-actions.evaluate which is not a recipe step). +const expectedCatalogCommandIds = [ + "inspect-mounted-inputs", + "wordpress.run-php", + "wordpress.wp-cli", + "wordpress.ability", + "wordpress.bench", + "wordpress.phpunit", + "wordpress.plugin-check", + "wordpress.core-phpunit", + "wordpress.theme-check", + "wordpress.browser-probe", + "wordpress.browser-actions", + "wordpress.browser-actions.evaluate", + "wp-codebox.agent-runtime-probe", + "wp-codebox.agent-sandbox-run", +] + const representativeRecipes = [ { schema: "wp-codebox/workspace-recipe/v1", @@ -92,7 +112,7 @@ function assert(condition: unknown, message: string): asserts condition { async function main(): Promise { const catalog = await cliJson(["commands", "--json"]) assert(catalog.schema === "wp-codebox/command-catalog/v1", "Unexpected command catalog schema") - assert(JSON.stringify(catalog.commands.map((command) => command.id)) === JSON.stringify(expectedCommandIds), "Command ids changed unexpectedly") + assert(JSON.stringify(catalog.commands.map((command) => command.id)) === JSON.stringify(expectedCatalogCommandIds), "Command ids changed unexpectedly") for (const command of catalog.commands) { assert(command.description.length > 0, `${command.id} is missing description`)