Automattic · chubes4 · May 30, 2026 · May 30, 2026
diff --git a/README.md b/README.md
@@ -468,7 +468,7 @@ Supported runtime commands today:
 
 `wordpress.wp-cli` automatically enables Playground's `wp-cli` extra library when the command is allowed by runtime policy.
 
-`wordpress.browser-probe` accepts `wait-for=domcontentloaded|load|networkidle|selector:<selector>|duration`, `duration=<n>s`, and `capture=console,errors,html,network,performance,memory,screenshot`. It records machine-readable evidence refs such as `files/browser/console.jsonl`, `files/browser/errors.jsonl`, `files/browser/network.jsonl`, `files/browser/performance.json`, `files/browser/memory.json`, `files/browser/checkpoints.jsonl`, `files/browser/snapshot.html`, `files/browser/screenshot.png`, and `files/browser/summary.json` when those captures are enabled. The summary includes requested/final URLs, viewport/device metadata, HTML and screenshot hashes, network event counts, optional final/peak browser memory and performance summaries, and a generic `artifact-backed|partial|diagnostic-only` replayability classification. Performance and memory captures use generic browser/CDP data only: JS heap when available, CDP `Performance.getMetrics`, CDP DOM counters, DOM/resource counts and byte totals, and long task counts/duration. WP Codebox intentionally keeps these browser evidence fields generic; consumers such as eval harnesses may interpret them without WP Codebox adding scoring, grading, or benchmark semantics.
+`wordpress.browser-probe` accepts `wait-for=domcontentloaded|load|networkidle|selector:<selector>|duration`, `duration=<n>s`, and `capture=console,errors,html,network,performance,memory,screenshot`. It records machine-readable evidence refs such as `files/browser/console.jsonl`, `files/browser/errors.jsonl`, `files/browser/network.jsonl`, `files/browser/performance.json`, `files/browser/memory.json`, `files/browser/checkpoints.jsonl`, `files/browser/snapshot.html`, `files/browser/screenshot.png`, and `files/browser/summary.json` when those captures are enabled. The summary includes requested/final URLs, viewport/device metadata, HTML and screenshot hashes, network event counts, optional final/peak browser memory and performance summaries, and a generic `artifact-backed|partial|diagnostic-only` replayability classification. Performance and memory captures use generic browser/CDP data only: JS heap when available, CDP `Performance.getMetrics`, CDP DOM counters, DOM/resource counts and byte totals, and long task counts/duration. Probe scripts may call `window.__wpCodeboxProbeCheckpoint(name, metadata)` when `performance` or `memory` capture is enabled to record named generic checkpoint snapshots. WP Codebox intentionally keeps these browser evidence fields generic; consumers such as eval harnesses may interpret them without WP Codebox adding scoring, grading, or benchmark semantics.
 
 `wordpress.browser-actions` accepts `actions-json=<array>` with ordered `navigate`, `click`, `fill`, `press`, `wait`, and `capture` actions. `navigate` uses `url` plus optional `waitFor=domcontentloaded|load|networkidle`; `click` uses `selector` or `text`; `fill` uses `selector` and `value`; `press` uses `key` plus optional `selector`; `wait` uses `selector` or `waitFor=domcontentloaded|load|networkidle|duration` with `duration=<n>s|<n>ms`. It records `files/browser/actions.jsonl`, `files/browser/action-summary.json`, and optional `console`, `errors`, `network`, `html`, and `screenshot` captures. Failures identify the failed action index/type in the action log, include serialized browser errors, and still write the requested audit artifacts when possible.
 

diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts
@@ -3626,6 +3626,16 @@ async function validateRecipeStepArgs(step: WorkspaceRecipe["workflow"]["steps"]
       addIssue("invalid-duration", `${path}.args`, "wordpress.browser-probe duration must look like 500ms or 2s.")
     }
 
+    const repeat = recipeStepArgValue(step.args ?? [], "repeat")
+    if (repeat && !/^[1-9]\d*$/.test(repeat)) {
+      addIssue("invalid-repeat", `${path}.args`, "wordpress.browser-probe repeat must be a positive integer.")
+    }
+
+    const resetBetween = recipeStepArgValue(step.args ?? [], "reset-between")
+    if (resetBetween && !["none", "reload", "new-page"].includes(resetBetween)) {
+      addIssue("invalid-reset-between", `${path}.args`, "wordpress.browser-probe reset-between must be none, reload, or new-page.")
+    }
+
     const capture = recipeStepArgValue(step.args ?? [], "capture")
     if (capture) {
       for (const item of capture.split(",").map((value) => value.trim()).filter(Boolean)) {

diff --git a/packages/runtime-core/src/index.ts b/packages/runtime-core/src/index.ts
@@ -170,6 +170,8 @@ export const commandRegistry = [
       { name: "duration", description: "Extra capture duration, or wait time when wait-for=duration.", format: "duration, e.g. 2s or 500ms" },
       { name: "script", description: "Optional page-side JavaScript to evaluate after navigation and before final capture.", format: "JavaScript function body" },
       { name: "capture", description: "Comma-separated artifacts to capture.", format: "console,errors,html,network,performance,memory,screenshot" },
+      { name: "repeat", description: "Optional repeated probe iterations for leak-oriented recipes.", format: "positive integer" },
+      { name: "reset-between", description: "Requested reset mode between repeated probe iterations.", format: "none|reload|new-page" },
     ],
     outputShape: "JSON summary plus files/browser/console.jsonl, errors.jsonl, network.jsonl, performance.json, memory.json, checkpoints.jsonl, snapshot.html, summary.json, and screenshot.png when captured.",
     policyRequirement: "Runtime policy commands must include wordpress.browser-probe.",

diff --git a/packages/runtime-playground/src/index.ts b/packages/runtime-playground/src/index.ts
@@ -46,7 +46,15 @@ import type { ConsoleMessage, Page, Request, Response } from "playwright"
 const BROWSER_PROBE_CAPTURE_VALUES = ["console", "errors", "html", "network", "performance", "memory", "screenshot"] as const
 const BROWSER_PROBE_PERFORMANCE_INIT_SCRIPT = `
 (() => {
-  const state = globalThis.__wpCodeboxBrowserProbe = globalThis.__wpCodeboxBrowserProbe || { longTasks: [] };
+  const state = globalThis.__wpCodeboxBrowserProbe = globalThis.__wpCodeboxBrowserProbe || { checkpoints: [], longTasks: [] };
+  state.checkpoints = state.checkpoints || [];
+  globalThis.__wpCodeboxProbeCheckpoint = (name, metadata = {}) => {
+    state.checkpoints.push({
+      name: String(name || ''),
+      metadata,
+      timestamp: new Date().toISOString(),
+    });
+  };
   if (state.longTaskObserverInstalled || typeof PerformanceObserver === 'undefined') {
     return;
   }
@@ -605,6 +613,7 @@ interface BrowserProbePerformanceSummary {
 interface BrowserProbeCheckpointRecord {
   schema: "wp-codebox/browser-checkpoint/v1"
   name: string
+  metadata?: unknown
   timestamp: string
   metrics: BrowserProbeMetricsSnapshot
 }
@@ -1390,6 +1399,7 @@ class PlaygroundRuntime implements Runtime {
           return run()
         }, script)
         if (capturesBrowserMetrics) {
+          checkpoints.push(...await browserProbePendingCheckpoints(page))
           checkpoints.push(await browserProbeCheckpoint(page, "after-script"))
         }
       }
@@ -2934,11 +2944,30 @@ function browserProbeReplayability(capture: Set<string>): BrowserProbeReplayabil
   return "diagnostic-only"
 }
 
-async function browserProbeCheckpoint(page: Page, name: string): Promise<BrowserProbeCheckpointRecord> {
+async function browserProbePendingCheckpoints(page: Page): Promise<BrowserProbeCheckpointRecord[]> {
+  const pending = await page.evaluate(() => {
+    const state = (globalThis as typeof globalThis & { __wpCodeboxBrowserProbe?: { checkpoints?: Array<{ name?: unknown; metadata?: unknown; timestamp?: unknown }> } }).__wpCodeboxBrowserProbe
+    const checkpoints = Array.isArray(state?.checkpoints) ? state.checkpoints.splice(0) : []
+    return checkpoints.map((checkpoint) => ({
+      name: typeof checkpoint.name === "string" ? checkpoint.name : "checkpoint",
+      metadata: checkpoint.metadata,
+      timestamp: typeof checkpoint.timestamp === "string" ? checkpoint.timestamp : undefined,
+    }))
+  })
+
+  const records: BrowserProbeCheckpointRecord[] = []
+  for (const checkpoint of pending) {
+    records.push(await browserProbeCheckpoint(page, checkpoint.name, checkpoint.metadata, checkpoint.timestamp))
+  }
+  return records
+}
+
+async function browserProbeCheckpoint(page: Page, name: string, metadata?: unknown, timestamp?: string): Promise<BrowserProbeCheckpointRecord> {
   return {
     schema: "wp-codebox/browser-checkpoint/v1",
     name,
-    timestamp: now(),
+    ...(typeof metadata !== "undefined" ? { metadata } : {}),
+    timestamp: timestamp ?? now(),
     metrics: await browserProbeMetricsSnapshot(page),
   }
 }

diff --git a/scripts/browser-probe-artifact-smoke.ts b/scripts/browser-probe-artifact-smoke.ts
@@ -42,7 +42,7 @@ await writeFile(recipePath, `${JSON.stringify({
           "wait-for=load",
           "duration=1s",
           "capture=console,errors,html,network,performance,memory,screenshot",
-          "script=console.info('wp-codebox fixture browser script'); return { title: document.title, hasBody: !!document.body };",
+          "script=window.__wpCodeboxProbeCheckpoint('fixture-before-return', { source: 'smoke' }); console.info('wp-codebox fixture browser script'); return { title: document.title, hasBody: !!document.body };",
         ],
       },
     ],
@@ -97,6 +97,7 @@ assert.match(errorLog, /wp-codebox fixture browser error/)
 assert.match(htmlSnapshot, /Browser Error Fixture|wp-codebox fixture console error/)
 assert.match(networkLog, /"type":"response"/)
 assert.match(checkpointsLog, /"schema":"wp-codebox\/browser-checkpoint\/v1"/)
+assert.match(checkpointsLog, /"name":"fixture-before-return"/)
 
 const memory = JSON.parse(await readFile(memoryPath, "utf8")) as { schema: string; final: { domCounters: { nodes: number | null } }; peak: { domNodes: { final: number | null; peak: number | null } }; checkpoints: unknown[] }
 const performance = JSON.parse(await readFile(performancePath, "utf8")) as { schema: string; final: { resources: { count: number }; dom: { nodes: number } }; peak: { resources: number; domNodes: { final: number | null; peak: number | null } }; checkpoints: unknown[] }