Automattic · chubes4 · May 30, 2026 · May 30, 2026
diff --git a/README.md b/README.md
@@ -635,7 +635,7 @@ npm run wp-codebox -- recipe-run \
   --json
 ```
 
-Each workload file returns a callable. The callable may return numeric metrics directly or a payload with `metrics` and `metadata` keys. The recipe output reports duration percentiles, custom metric aggregates, peak memory, runtime artifacts, and the parsed `benchResults` object in JSON output when a single `wordpress.bench` step runs.
+Each workload file returns a callable. The callable may return numeric metrics directly or a payload with `metrics` and `metadata` keys. The recipe output reports duration percentiles, custom metric aggregates, peak memory, runtime artifacts, and the parsed `benchResults` object in JSON output when a single `wordpress.bench` step runs. If earlier `wordpress.browser-probe` steps in the same recipe captured generic `performance` or `memory` artifacts, `wordpress.bench` promotes selected numeric browser values into each scenario's metrics using `browser_*` names, while the raw browser artifacts remain available under `files/browser/`.
 
 ### `agent-runtime-probe`
 

diff --git a/package.json b/package.json
@@ -50,6 +50,7 @@
     "phpunit-diagnostic-artifact-smoke": "tsx scripts/phpunit-diagnostic-artifact-smoke.ts",
     "plugin-check-normalization-smoke": "tsx scripts/plugin-check-normalization-smoke.ts",
     "recipe-bench-smoke": "tsx scripts/recipe-bench-smoke.ts",
+    "recipe-browser-bench-metrics-smoke": "tsx scripts/recipe-browser-bench-metrics-smoke.ts",
     "recipe-browser-smoke": "tsx scripts/recipe-browser-smoke.ts",
     "browser-probe-artifact-smoke": "tsx scripts/browser-probe-artifact-smoke.ts",
     "browser-actions-artifact-smoke": "tsx scripts/browser-actions-artifact-smoke.ts",
@@ -73,7 +74,7 @@
     "browser-runtime-operation-smoke": "tsx scripts/browser-runtime-operation-smoke.ts",
     "wp-codebox": "node packages/cli/dist/index.js",
     "wordpress-plugin-smoke": "php tests/smoke-wordpress-plugin.php",
-    "check": "npm run build && npm run command-registry-smoke && npm run sandbox-tool-policy-smoke && npm run task-input-contract-smoke && npm run discovery-command-smoke && npm run theme-check-normalization-smoke && npm run agent-sandbox-code-smoke && npm run policy-validation-smoke && npm run workspace-policy-smoke && npm run wordpress-plugin-smoke && npm run browser-runtime-operation-smoke && npm run package-distribution-smoke && npm run artifact-bundle-verifier-smoke && npm run artifact-patch-git-apply-smoke && npm run artifact-contract-smoke && npm run external-adapter-contract-smoke && npm run runtime-episode-smoke && npm run runtime-snapshot-restore-smoke && npm run runtime-action-adapter-smoke && npm run core-phpunit-command-smoke && npm run plugin-check-normalization-smoke && npm run recipe-bench-smoke && npm run recipe-dry-run-smoke && npm run recipe-workflow-phases-smoke && npm run recipe-site-seed-smoke && npm run recipe-staged-files-smoke && npm run recipe-workspace-seed-excludes-smoke && npm run recipe-runtime-evidence-smoke && npm run recipe-interruption-artifacts-smoke && npm run recipe-heavyweight-plugin-runtime-smoke && npm run preview-options-contract-smoke && npm run preview-port-smoke && npm run preview-public-url-canonical-smoke && npm run preview-response-body-smoke && npm run boot-preview-smoke && npm run blueprint-validation-smoke && npm run browser-probe-artifact-smoke && npm run browser-actions-artifact-smoke && npm run wp-codebox -- run --mount ./examples/simple-plugin:/wordpress/wp-content/plugins/simple-plugin --command wordpress.run-php --arg code-file=./examples/simple-plugin/probe.php --artifacts ./artifacts --json"
+    "check": "npm run build && npm run command-registry-smoke && npm run sandbox-tool-policy-smoke && npm run task-input-contract-smoke && npm run discovery-command-smoke && npm run theme-check-normalization-smoke && npm run agent-sandbox-code-smoke && npm run policy-validation-smoke && npm run workspace-policy-smoke && npm run wordpress-plugin-smoke && npm run browser-runtime-operation-smoke && npm run package-distribution-smoke && npm run artifact-bundle-verifier-smoke && npm run artifact-patch-git-apply-smoke && npm run artifact-contract-smoke && npm run external-adapter-contract-smoke && npm run runtime-episode-smoke && npm run runtime-snapshot-restore-smoke && npm run runtime-action-adapter-smoke && npm run core-phpunit-command-smoke && npm run plugin-check-normalization-smoke && npm run recipe-bench-smoke && npm run recipe-browser-bench-metrics-smoke && npm run recipe-dry-run-smoke && npm run recipe-workflow-phases-smoke && npm run recipe-site-seed-smoke && npm run recipe-staged-files-smoke && npm run recipe-workspace-seed-excludes-smoke && npm run recipe-runtime-evidence-smoke && npm run recipe-interruption-artifacts-smoke && npm run recipe-heavyweight-plugin-runtime-smoke && npm run preview-options-contract-smoke && npm run preview-port-smoke && npm run preview-public-url-canonical-smoke && npm run preview-response-body-smoke && npm run boot-preview-smoke && npm run blueprint-validation-smoke && npm run browser-probe-artifact-smoke && npm run browser-actions-artifact-smoke && npm run wp-codebox -- run --mount ./examples/simple-plugin:/wordpress/wp-content/plugins/simple-plugin --command wordpress.run-php --arg code-file=./examples/simple-plugin/probe.php --artifacts ./artifacts --json"
   },
   "workspaces": [
     "packages/*"

diff --git a/packages/runtime-playground/src/index.ts b/packages/runtime-playground/src/index.ts
@@ -567,6 +567,7 @@ interface BrowserProbeArtifact {
     finalUrl: string
     htmlSnapshot: boolean
     memory?: BrowserProbeMemorySummary
+    metrics?: Record<string, number>
     networkEvents: number
     performance?: BrowserProbePerformanceSummary
     replayability: BrowserProbeReplayability
@@ -1463,6 +1464,7 @@ class PlaygroundRuntime implements Runtime {
           finalUrl,
           htmlSnapshot: capture.has("html"),
           ...(memoryArtifact ? { memory: memoryArtifact.peak } : {}),
+          ...(memoryArtifact || performanceArtifact ? { metrics: browserProbeBenchMetrics(memoryArtifact, performanceArtifact) } : {}),
           networkEvents: network.length,
           ...(performanceArtifact ? { performance: performanceArtifact.peak } : {}),
           replayability: browserProbeReplayability(capture),
@@ -1900,7 +1902,7 @@ class PlaygroundRuntime implements Runtime {
     })
     assertPlaygroundResponseOk("wordpress.bench", response)
 
-    return response.text
+    return promoteBrowserMetricsToBenchResults(response.text, this.browserProbes)
   }
 
   private async runPhpunit(spec: ExecutionSpec): Promise<string> {
@@ -3160,6 +3162,68 @@ function lastNumber(values: Array<number | null | undefined>): number | null {
   return null
 }
 
+function browserProbeBenchMetrics(memoryArtifact?: BrowserProbeMemoryArtifact, performanceArtifact?: BrowserProbePerformanceArtifact): Record<string, number> {
+  const memory = memoryArtifact?.peak
+  const performance = performanceArtifact?.final
+  return {
+    browser_peak_used_js_heap_bytes: memory?.usedJSHeapSize.peak ?? 0,
+    browser_final_used_js_heap_bytes: memory?.usedJSHeapSize.final ?? 0,
+    browser_checkpoint_count: performanceArtifact?.checkpoints.length ?? memoryArtifact?.checkpoints.length ?? 0,
+    browser_dom_node_count: performance?.dom.nodes ?? memory?.domNodes.final ?? 0,
+    browser_iframe_count: performance?.dom.iframes ?? 0,
+    browser_resource_count: performance?.resources.count ?? 0,
+    browser_transfer_size_bytes: performance?.resources.transferSizeBytes ?? 0,
+    browser_long_task_count: performance?.longTasks.count ?? 0,
+    browser_long_task_total_ms: performance?.longTasks.totalDurationMs ?? 0,
+  }
+}
+
+function promoteBrowserMetricsToBenchResults(raw: string, probes: BrowserProbeArtifact[]): string {
+  const metrics = combinedBrowserBenchMetrics(probes)
+  if (!metrics) {
+    return raw
+  }
+
+  const parsed = JSON.parse(raw) as Record<string, unknown>
+  const scenarios = Array.isArray(parsed.scenarios) ? parsed.scenarios : []
+  for (const scenario of scenarios) {
+    if (!isRecord(scenario)) {
+      continue
+    }
+
+    scenario.metrics = {
+      ...(isRecord(scenario.metrics) ? scenario.metrics : {}),
+      ...metrics,
+    }
+  }
+
+  return `${JSON.stringify(parsed, null, 2)}\n`
+}
+
+function combinedBrowserBenchMetrics(probes: BrowserProbeArtifact[]): Record<string, number> | undefined {
+  const metricSets = probes.map((probe) => probe.summary.metrics).filter((metrics): metrics is Record<string, number> => isRecord(metrics))
+  if (metricSets.length === 0) {
+    return undefined
+  }
+
+  const finalMetrics = metricSets.at(-1) ?? {}
+  return {
+    browser_peak_used_js_heap_bytes: Math.max(...metricSets.map((metrics) => metrics.browser_peak_used_js_heap_bytes ?? 0)),
+    browser_final_used_js_heap_bytes: finalMetrics.browser_final_used_js_heap_bytes ?? 0,
+    browser_checkpoint_count: sumMetric(metricSets, "browser_checkpoint_count"),
+    browser_dom_node_count: finalMetrics.browser_dom_node_count ?? 0,
+    browser_iframe_count: finalMetrics.browser_iframe_count ?? 0,
+    browser_resource_count: finalMetrics.browser_resource_count ?? 0,
+    browser_transfer_size_bytes: finalMetrics.browser_transfer_size_bytes ?? 0,
+    browser_long_task_count: sumMetric(metricSets, "browser_long_task_count"),
+    browser_long_task_total_ms: sumMetric(metricSets, "browser_long_task_total_ms"),
+  }
+}
+
+function sumMetric(metricSets: Array<Record<string, number>>, name: string): number {
+  return metricSets.reduce((total, metrics) => total + (metrics[name] ?? 0), 0)
+}
+
 function serializeBrowserResponse(response: Response): BrowserProbeNetworkRecord {
   const request = response.request()
   return {

diff --git a/scripts/recipe-browser-bench-metrics-smoke.ts b/scripts/recipe-browser-bench-metrics-smoke.ts
@@ -0,0 +1,106 @@
+import assert from "node:assert/strict"
+import { spawn } from "node:child_process"
+import { existsSync } from "node:fs"
+import { mkdir, readFile, rm, writeFile } from "node:fs/promises"
+import { join, resolve } from "node:path"
+
+const repoRoot = resolve(import.meta.dirname, "..")
+const workspace = resolve(repoRoot, "artifacts", "recipe-browser-bench-metrics-smoke")
+const recipePath = join(workspace, "recipe.json")
+const artifactsRoot = join(workspace, "artifacts")
+
+await rm(workspace, { recursive: true, force: true })
+await mkdir(workspace, { recursive: true })
+
+await writeFile(recipePath, `${JSON.stringify({
+  schema: "wp-codebox/workspace-recipe/v1",
+  inputs: {
+    extraPlugins: [
+      {
+        source: resolve(repoRoot, "examples", "bench-plugin"),
+        slug: "bench-plugin",
+      },
+    ],
+  },
+  workflow: {
+    steps: [
+      {
+        command: "wordpress.browser-probe",
+        args: [
+          "url=/",
+          "wait-for=load",
+          "capture=performance,memory",
+          "script=document.body.appendChild(document.createElement('iframe'));",
+        ],
+      },
+      {
+        command: "wordpress.bench",
+        args: [
+          "component-id=bench-plugin",
+          "plugin-slug=bench-plugin",
+          "iterations=1",
+          "warmup=0",
+        ],
+      },
+    ],
+  },
+  artifacts: {
+    directory: artifactsRoot,
+  },
+}, null, 2)}\n`)
+
+const output = await runCli([
+  "packages/cli/dist/index.js",
+  "recipe-run",
+  "--recipe",
+  recipePath,
+  "--json",
+])
+
+assert.equal(output.success, true, output.error?.message ?? "recipe-run failed")
+assert.ok(output.benchResults, "recipe-run should expose benchResults")
+assert.equal(output.benchResults.scenarios.length, 1)
+
+const metrics = output.benchResults.scenarios[0].metrics
+assert.ok(metrics.browser_checkpoint_count >= 3, "browser_checkpoint_count should include probe checkpoints")
+assert.ok(metrics.browser_dom_node_count > 0, "browser_dom_node_count should be numeric")
+assert.equal(metrics.browser_iframe_count, 1)
+assert.ok(metrics.browser_resource_count >= 0, "browser_resource_count should be numeric")
+assert.ok(metrics.browser_transfer_size_bytes >= 0, "browser_transfer_size_bytes should be numeric")
+assert.ok(metrics.browser_long_task_count >= 0, "browser_long_task_count should be numeric")
+assert.ok(metrics.browser_long_task_total_ms >= 0, "browser_long_task_total_ms should be numeric")
+assert.ok(metrics.browser_peak_used_js_heap_bytes >= 0, "browser_peak_used_js_heap_bytes should be numeric")
+assert.ok(metrics.browser_final_used_js_heap_bytes >= 0, "browser_final_used_js_heap_bytes should be numeric")
+
+const artifactDirectory = output.artifacts.directory
+const performancePath = join(artifactDirectory, "files", "browser", "performance.json")
+const checkpointsPath = join(artifactDirectory, "files", "browser", "checkpoints.jsonl")
+assert.equal(existsSync(performancePath), true, "performance.json should remain available")
+assert.equal(existsSync(checkpointsPath), true, "checkpoints.jsonl should remain available")
+
+const performance = JSON.parse(await readFile(performancePath, "utf8"))
+assert.equal(performance.schema, "wp-codebox/browser-performance/v1")
+assert.ok(performance.checkpoints.length >= 3, "performance artifact should include probe checkpoints")
+assert.match(await readFile(checkpointsPath, "utf8"), /"name":"after-navigation"/)
+
+console.log(`Recipe browser bench metrics smoke passed: ${artifactDirectory}`)
+
+async function runCli(args: string[]): Promise<any> {
+  const child = spawn(process.execPath, args, {
+    cwd: repoRoot,
+    stdio: ["ignore", "pipe", "pipe"],
+  })
+
+  let stdout = ""
+  let stderr = ""
+  child.stdout.on("data", (chunk) => {
+    stdout += chunk.toString()
+  })
+  child.stderr.on("data", (chunk) => {
+    stderr += chunk.toString()
+  })
+
+  const exitCode = await new Promise<number | null>((resolveExit) => child.once("exit", (code) => resolveExit(code)))
+  assert.equal(exitCode, 0, `CLI exited with ${exitCode}\nSTDOUT:\n${stdout}\nSTDERR:\n${stderr}`)
+  return JSON.parse(stdout)
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -635,7 +635,7 @@ npm run wp-codebox -- recipe-run \ @@
       --json
     ```
-    Each workload file returns a callable. The callable may return numeric metrics directly or a payload with `metrics` and `metadata` keys. The recipe output reports duration percentiles, custom metric aggregates, peak memory, runtime artifacts, and the parsed `benchResults` object in JSON output when a single `wordpress.bench` step runs.
+    Each workload file returns a callable. The callable may return numeric metrics directly or a payload with `metrics` and `metadata` keys. The recipe output reports duration percentiles, custom metric aggregates, peak memory, runtime artifacts, and the parsed `benchResults` object in JSON output when a single `wordpress.bench` step runs. If earlier `wordpress.browser-probe` steps in the same recipe captured generic `performance` or `memory` artifacts, `wordpress.bench` promotes selected numeric browser values into each scenario's metrics using `browser_*` names, while the raw browser artifacts remain available under `files/browser/`.
     ### `agent-runtime-probe`
@@ Expand Down @@