Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ npm run wp-codebox -- recipe-run \
--json
```

Each workload file returns a callable. The callable may return numeric metrics directly or a payload with `metrics` and `metadata` keys. The recipe output reports duration percentiles, custom metric aggregates, peak memory, runtime artifacts, and the parsed `benchResults` object in JSON output when a single `wordpress.bench` step runs.
Each workload file returns a callable. The callable may return numeric metrics directly or a payload with `metrics` and `metadata` keys. The recipe output reports duration percentiles, custom metric aggregates, peak memory, runtime artifacts, and the parsed `benchResults` object in JSON output when a single `wordpress.bench` step runs. If earlier `wordpress.browser-probe` steps in the same recipe captured generic `performance` or `memory` artifacts, `wordpress.bench` promotes selected numeric browser values into each scenario's metrics using `browser_*` names, while the raw browser artifacts remain available under `files/browser/`.

### `agent-runtime-probe`

Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"phpunit-diagnostic-artifact-smoke": "tsx scripts/phpunit-diagnostic-artifact-smoke.ts",
"plugin-check-normalization-smoke": "tsx scripts/plugin-check-normalization-smoke.ts",
"recipe-bench-smoke": "tsx scripts/recipe-bench-smoke.ts",
"recipe-browser-bench-metrics-smoke": "tsx scripts/recipe-browser-bench-metrics-smoke.ts",
"recipe-browser-smoke": "tsx scripts/recipe-browser-smoke.ts",
"browser-probe-artifact-smoke": "tsx scripts/browser-probe-artifact-smoke.ts",
"browser-actions-artifact-smoke": "tsx scripts/browser-actions-artifact-smoke.ts",
Expand All @@ -73,7 +74,7 @@
"browser-runtime-operation-smoke": "tsx scripts/browser-runtime-operation-smoke.ts",
"wp-codebox": "node packages/cli/dist/index.js",
"wordpress-plugin-smoke": "php tests/smoke-wordpress-plugin.php",
"check": "npm run build && npm run command-registry-smoke && npm run sandbox-tool-policy-smoke && npm run task-input-contract-smoke && npm run discovery-command-smoke && npm run theme-check-normalization-smoke && npm run agent-sandbox-code-smoke && npm run policy-validation-smoke && npm run workspace-policy-smoke && npm run wordpress-plugin-smoke && npm run browser-runtime-operation-smoke && npm run package-distribution-smoke && npm run artifact-bundle-verifier-smoke && npm run artifact-patch-git-apply-smoke && npm run artifact-contract-smoke && npm run external-adapter-contract-smoke && npm run runtime-episode-smoke && npm run runtime-snapshot-restore-smoke && npm run runtime-action-adapter-smoke && npm run core-phpunit-command-smoke && npm run plugin-check-normalization-smoke && npm run recipe-bench-smoke && npm run recipe-dry-run-smoke && npm run recipe-workflow-phases-smoke && npm run recipe-site-seed-smoke && npm run recipe-staged-files-smoke && npm run recipe-workspace-seed-excludes-smoke && npm run recipe-runtime-evidence-smoke && npm run recipe-interruption-artifacts-smoke && npm run recipe-heavyweight-plugin-runtime-smoke && npm run preview-options-contract-smoke && npm run preview-port-smoke && npm run preview-public-url-canonical-smoke && npm run preview-response-body-smoke && npm run boot-preview-smoke && npm run blueprint-validation-smoke && npm run browser-probe-artifact-smoke && npm run browser-actions-artifact-smoke && npm run wp-codebox -- run --mount ./examples/simple-plugin:/wordpress/wp-content/plugins/simple-plugin --command wordpress.run-php --arg code-file=./examples/simple-plugin/probe.php --artifacts ./artifacts --json"
"check": "npm run build && npm run command-registry-smoke && npm run sandbox-tool-policy-smoke && npm run task-input-contract-smoke && npm run discovery-command-smoke && npm run theme-check-normalization-smoke && npm run agent-sandbox-code-smoke && npm run policy-validation-smoke && npm run workspace-policy-smoke && npm run wordpress-plugin-smoke && npm run browser-runtime-operation-smoke && npm run package-distribution-smoke && npm run artifact-bundle-verifier-smoke && npm run artifact-patch-git-apply-smoke && npm run artifact-contract-smoke && npm run external-adapter-contract-smoke && npm run runtime-episode-smoke && npm run runtime-snapshot-restore-smoke && npm run runtime-action-adapter-smoke && npm run core-phpunit-command-smoke && npm run plugin-check-normalization-smoke && npm run recipe-bench-smoke && npm run recipe-browser-bench-metrics-smoke && npm run recipe-dry-run-smoke && npm run recipe-workflow-phases-smoke && npm run recipe-site-seed-smoke && npm run recipe-staged-files-smoke && npm run recipe-workspace-seed-excludes-smoke && npm run recipe-runtime-evidence-smoke && npm run recipe-interruption-artifacts-smoke && npm run recipe-heavyweight-plugin-runtime-smoke && npm run preview-options-contract-smoke && npm run preview-port-smoke && npm run preview-public-url-canonical-smoke && npm run preview-response-body-smoke && npm run boot-preview-smoke && npm run blueprint-validation-smoke && npm run browser-probe-artifact-smoke && npm run browser-actions-artifact-smoke && npm run wp-codebox -- run --mount ./examples/simple-plugin:/wordpress/wp-content/plugins/simple-plugin --command wordpress.run-php --arg code-file=./examples/simple-plugin/probe.php --artifacts ./artifacts --json"
},
"workspaces": [
"packages/*"
Expand Down
66 changes: 65 additions & 1 deletion packages/runtime-playground/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ interface BrowserProbeArtifact {
finalUrl: string
htmlSnapshot: boolean
memory?: BrowserProbeMemorySummary
metrics?: Record<string, number>
networkEvents: number
performance?: BrowserProbePerformanceSummary
replayability: BrowserProbeReplayability
Expand Down Expand Up @@ -1463,6 +1464,7 @@ class PlaygroundRuntime implements Runtime {
finalUrl,
htmlSnapshot: capture.has("html"),
...(memoryArtifact ? { memory: memoryArtifact.peak } : {}),
...(memoryArtifact || performanceArtifact ? { metrics: browserProbeBenchMetrics(memoryArtifact, performanceArtifact) } : {}),
networkEvents: network.length,
...(performanceArtifact ? { performance: performanceArtifact.peak } : {}),
replayability: browserProbeReplayability(capture),
Expand Down Expand Up @@ -1900,7 +1902,7 @@ class PlaygroundRuntime implements Runtime {
})
assertPlaygroundResponseOk("wordpress.bench", response)

return response.text
return promoteBrowserMetricsToBenchResults(response.text, this.browserProbes)
}

private async runPhpunit(spec: ExecutionSpec): Promise<string> {
Expand Down Expand Up @@ -3160,6 +3162,68 @@ function lastNumber(values: Array<number | null | undefined>): number | null {
return null
}

function browserProbeBenchMetrics(memoryArtifact?: BrowserProbeMemoryArtifact, performanceArtifact?: BrowserProbePerformanceArtifact): Record<string, number> {
const memory = memoryArtifact?.peak
const performance = performanceArtifact?.final
return {
browser_peak_used_js_heap_bytes: memory?.usedJSHeapSize.peak ?? 0,
browser_final_used_js_heap_bytes: memory?.usedJSHeapSize.final ?? 0,
browser_checkpoint_count: performanceArtifact?.checkpoints.length ?? memoryArtifact?.checkpoints.length ?? 0,
browser_dom_node_count: performance?.dom.nodes ?? memory?.domNodes.final ?? 0,
browser_iframe_count: performance?.dom.iframes ?? 0,
browser_resource_count: performance?.resources.count ?? 0,
browser_transfer_size_bytes: performance?.resources.transferSizeBytes ?? 0,
browser_long_task_count: performance?.longTasks.count ?? 0,
browser_long_task_total_ms: performance?.longTasks.totalDurationMs ?? 0,
}
}

function promoteBrowserMetricsToBenchResults(raw: string, probes: BrowserProbeArtifact[]): string {
const metrics = combinedBrowserBenchMetrics(probes)
if (!metrics) {
return raw
}

const parsed = JSON.parse(raw) as Record<string, unknown>
const scenarios = Array.isArray(parsed.scenarios) ? parsed.scenarios : []
for (const scenario of scenarios) {
if (!isRecord(scenario)) {
continue
}

scenario.metrics = {
...(isRecord(scenario.metrics) ? scenario.metrics : {}),
...metrics,
}
}

return `${JSON.stringify(parsed, null, 2)}\n`
}

function combinedBrowserBenchMetrics(probes: BrowserProbeArtifact[]): Record<string, number> | undefined {
const metricSets = probes.map((probe) => probe.summary.metrics).filter((metrics): metrics is Record<string, number> => isRecord(metrics))
if (metricSets.length === 0) {
return undefined
}

const finalMetrics = metricSets.at(-1) ?? {}
return {
browser_peak_used_js_heap_bytes: Math.max(...metricSets.map((metrics) => metrics.browser_peak_used_js_heap_bytes ?? 0)),
browser_final_used_js_heap_bytes: finalMetrics.browser_final_used_js_heap_bytes ?? 0,
browser_checkpoint_count: sumMetric(metricSets, "browser_checkpoint_count"),
browser_dom_node_count: finalMetrics.browser_dom_node_count ?? 0,
browser_iframe_count: finalMetrics.browser_iframe_count ?? 0,
browser_resource_count: finalMetrics.browser_resource_count ?? 0,
browser_transfer_size_bytes: finalMetrics.browser_transfer_size_bytes ?? 0,
browser_long_task_count: sumMetric(metricSets, "browser_long_task_count"),
browser_long_task_total_ms: sumMetric(metricSets, "browser_long_task_total_ms"),
}
}

function sumMetric(metricSets: Array<Record<string, number>>, name: string): number {
return metricSets.reduce((total, metrics) => total + (metrics[name] ?? 0), 0)
}

function serializeBrowserResponse(response: Response): BrowserProbeNetworkRecord {
const request = response.request()
return {
Expand Down
106 changes: 106 additions & 0 deletions scripts/recipe-browser-bench-metrics-smoke.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import assert from "node:assert/strict"
import { spawn } from "node:child_process"
import { existsSync } from "node:fs"
import { mkdir, readFile, rm, writeFile } from "node:fs/promises"
import { join, resolve } from "node:path"

const repoRoot = resolve(import.meta.dirname, "..")
const workspace = resolve(repoRoot, "artifacts", "recipe-browser-bench-metrics-smoke")
const recipePath = join(workspace, "recipe.json")
const artifactsRoot = join(workspace, "artifacts")

await rm(workspace, { recursive: true, force: true })
await mkdir(workspace, { recursive: true })

await writeFile(recipePath, `${JSON.stringify({
schema: "wp-codebox/workspace-recipe/v1",
inputs: {
extraPlugins: [
{
source: resolve(repoRoot, "examples", "bench-plugin"),
slug: "bench-plugin",
},
],
},
workflow: {
steps: [
{
command: "wordpress.browser-probe",
args: [
"url=/",
"wait-for=load",
"capture=performance,memory",
"script=document.body.appendChild(document.createElement('iframe'));",
],
},
{
command: "wordpress.bench",
args: [
"component-id=bench-plugin",
"plugin-slug=bench-plugin",
"iterations=1",
"warmup=0",
],
},
],
},
artifacts: {
directory: artifactsRoot,
},
}, null, 2)}\n`)

const output = await runCli([
"packages/cli/dist/index.js",
"recipe-run",
"--recipe",
recipePath,
"--json",
])

assert.equal(output.success, true, output.error?.message ?? "recipe-run failed")
assert.ok(output.benchResults, "recipe-run should expose benchResults")
assert.equal(output.benchResults.scenarios.length, 1)

const metrics = output.benchResults.scenarios[0].metrics
assert.ok(metrics.browser_checkpoint_count >= 3, "browser_checkpoint_count should include probe checkpoints")
assert.ok(metrics.browser_dom_node_count > 0, "browser_dom_node_count should be numeric")
assert.equal(metrics.browser_iframe_count, 1)
assert.ok(metrics.browser_resource_count >= 0, "browser_resource_count should be numeric")
assert.ok(metrics.browser_transfer_size_bytes >= 0, "browser_transfer_size_bytes should be numeric")
assert.ok(metrics.browser_long_task_count >= 0, "browser_long_task_count should be numeric")
assert.ok(metrics.browser_long_task_total_ms >= 0, "browser_long_task_total_ms should be numeric")
assert.ok(metrics.browser_peak_used_js_heap_bytes >= 0, "browser_peak_used_js_heap_bytes should be numeric")
assert.ok(metrics.browser_final_used_js_heap_bytes >= 0, "browser_final_used_js_heap_bytes should be numeric")

const artifactDirectory = output.artifacts.directory
const performancePath = join(artifactDirectory, "files", "browser", "performance.json")
const checkpointsPath = join(artifactDirectory, "files", "browser", "checkpoints.jsonl")
assert.equal(existsSync(performancePath), true, "performance.json should remain available")
assert.equal(existsSync(checkpointsPath), true, "checkpoints.jsonl should remain available")

const performance = JSON.parse(await readFile(performancePath, "utf8"))
assert.equal(performance.schema, "wp-codebox/browser-performance/v1")
assert.ok(performance.checkpoints.length >= 3, "performance artifact should include probe checkpoints")
assert.match(await readFile(checkpointsPath, "utf8"), /"name":"after-navigation"/)

console.log(`Recipe browser bench metrics smoke passed: ${artifactDirectory}`)

async function runCli(args: string[]): Promise<any> {
const child = spawn(process.execPath, args, {
cwd: repoRoot,
stdio: ["ignore", "pipe", "pipe"],
})

let stdout = ""
let stderr = ""
child.stdout.on("data", (chunk) => {
stdout += chunk.toString()
})
child.stderr.on("data", (chunk) => {
stderr += chunk.toString()
})

const exitCode = await new Promise<number | null>((resolveExit) => child.once("exit", (code) => resolveExit(code)))
assert.equal(exitCode, 0, `CLI exited with ${exitCode}\nSTDOUT:\n${stdout}\nSTDERR:\n${stderr}`)
return JSON.parse(stdout)
}