From 4b6d5a4314c6188427bb43616102bc75a6c82bb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 16 May 2026 10:13:56 +0200 Subject: [PATCH] Add focused COW WordPress semantic validator gate --- Makefile | 6 +- docs/merge-reliability.md | 11 +- tests/cow/wp_semantic_validator.php | 214 ++++++++++++++++++++++++++++ 3 files changed, 228 insertions(+), 3 deletions(-) create mode 100644 tests/cow/wp_semantic_validator.php diff --git a/Makefile b/Makefile index c0c1c018..e50153af 100644 --- a/Makefile +++ b/Makefile @@ -75,7 +75,7 @@ else ifeq ($(UNAME_S)-$(UNAME_M),Linux-aarch64) FORKPRESS_TARGET ?= aarch64-unknown-linux-musl endif -.PHONY: all clean test test-compat test-branchfs test-cow test-cow-branch-birth test-cow-explicit-ids test-cow-fast test-cow-filesystem test-cow-git-server test-cow-id-bands test-cow-media-validator test-cow-merge test-cow-merge-smoke test-cow-plugin-validator test-cow-schema-review test-cow-stale-audit test-release init-db test-all forkpress forkpress-dev dist dist-dev +.PHONY: all clean test test-compat test-branchfs test-cow test-cow-branch-birth test-cow-explicit-ids test-cow-fast test-cow-filesystem test-cow-git-server test-cow-id-bands test-cow-media-validator test-cow-merge test-cow-merge-smoke test-cow-plugin-validator test-cow-schema-review test-cow-stale-audit test-cow-wp-semantic-validator test-release init-db test-all forkpress forkpress-dev dist dist-dev all: $(BRANCHFS_EXT_SO) @@ -134,6 +134,9 @@ test-cow-plugin-validator: test-cow-stale-audit: php $(COW_TEST_DIR)/stale_audit.php +test-cow-wp-semantic-validator: + php $(COW_TEST_DIR)/wp_semantic_validator.php + test-cow-schema-review: php $(COW_TEST_DIR)/schema_review.php @@ -146,6 +149,7 @@ test-cow-fast: test-cow-git-server test-cow-merge-smoke php $(COW_TEST_DIR)/plugin_validator.php php $(COW_TEST_DIR)/schema_review.php php $(COW_TEST_DIR)/stale_audit.php + php $(COW_TEST_DIR)/wp_semantic_validator.php php $(COW_TEST_DIR)/branch_ui.php php $(COW_TEST_DIR)/router_paths.php php $(COW_TEST_DIR)/router_lock.php diff --git a/docs/merge-reliability.md b/docs/merge-reliability.md index 86a3cf07..ad8845e1 100644 --- a/docs/merge-reliability.md +++ b/docs/merge-reliability.md @@ -17,7 +17,7 @@ when there is a test or document that exercises the specific merge invariant. | Objective item | Evidence in this PR | Remaining gap | | --- | --- | --- | -| 1. Real WordPress semantic merge coverage | `tests/cow/e2e.sh` creates source and target branches through runtime WordPress requests, validates each branch-local graph before merge, then merges pages, branch-local page edits/deletes with edited content and authors, postmeta, users/usermeta, authors, comments/commentmeta, hierarchical taxonomy terms, nav menus and menu locations, reusable `wp_block` rows, page-to-reusable-block refs, `core/image` block refs and featured-image refs to media attachments, options and JSON options with branch user/object IDs, media uploads with attachment parents plus generated-size metadata/files, a CPT-like `forkpress_note`, and plugin-shaped custom tables/files. The semantic E2E merge now requires `status: completed` and a zero-conflict merge run, so runtime-only state cannot hide behind a surviving object graph. `tests/cow/merge.php` adds deterministic WordPress row fingerprint and validator coverage. | Add broader concurrent edit/delete matrices for complete WP objects and deterministic repair policies only where the owner object is unambiguous. | +| 1. Real WordPress semantic merge coverage | `tests/cow/e2e.sh` creates source and target branches through runtime WordPress requests, validates each branch-local graph before merge, then merges pages, branch-local page edits/deletes with edited content and authors, postmeta, users/usermeta, authors, comments/commentmeta, hierarchical taxonomy terms, nav menus and menu locations, reusable `wp_block` rows, page-to-reusable-block refs, `core/image` block refs and featured-image refs to media attachments, options and JSON options with branch user/object IDs, media uploads with attachment parents plus generated-size metadata/files, a CPT-like `forkpress_note`, and plugin-shaped custom tables/files. The semantic E2E merge now requires `status: completed` and a zero-conflict merge run, so runtime-only state cannot hide behind a surviving object graph. `tests/cow/wp_semantic_validator.php` is a focused fast gate for discovered WordPress semantic validators that catch pages left pointing at deleted reusable blocks or synced patterns. `tests/cow/merge.php` adds deterministic WordPress row fingerprint and validator coverage. | Add broader concurrent edit/delete matrices for complete WP objects and deterministic repair policies only where the owner object is unambiguous. | | 2. Plugin-specific merge semantics | `docs/plugin-merge-validators.md` defines the validator contract, including rejecting contradictory status/finding output. `scripts/cow/merge.php` discovers active plugin and mu-plugin validators, runs explicit validators, records plugin-scoped conflicts, and rolls back inline validator failures. `tests/cow/plugin_validator.php` is a focused fast gate for discovered validator review of a plugin-owned DB/JSON/file graph, plugin-scoped audit output for incoherent JSON and missing file references, identical validator rerun dedupe, contradictory validator output rejection, and replacement-evidence revalidation when validator findings change after review. `tests/cow/merge.php` covers clean custom-table graph merges, validator findings, audit/review grouping, validator rerun evidence, file-root context, active-plugin discovery, explicit-ID plugin graph validation, contradictory validator output rejection, and failed-validator rollback. `tests/cow/e2e.sh` covers a runtime plugin-shaped graph across custom table parent/child rows, child JSON payload refs, JSON, serialized data, options, postmeta, CPT data, and branch-owned file contents. | Add validators for real plugins and add merge drivers only for plugin-owned repairs that can prove correctness. | | 3. Remaining review-only schema cases | `scripts/cow/merge.php` validates source-added views/triggers, preserves invalid dependency cases as conflicts, and supports safe schema object resolution for deterministic subsets. `tests/cow/schema_review.php` is a focused fast gate proving acyclic source-added dependent views apply in dependency order, cyclic source-added views/triggers stay reviewable, and source-added triggers with missing target dependencies stay gated until the dependency is restored. `tests/cow/merge.php` covers broader cyclic/invalid view and trigger dependency handling, source-added dependent view ordering, and rebuild validation cases. | Improve dependency planning for more safe reorderings. Cyclic or semantically ambiguous cases should stay review-only. | | 4. Filesystem merge hardening | `tests/cow/filesystem.php` is a focused fast gate for safe source text/binary file application, safe relative symlink changes/additions, unsafe absolute symlinks staying as auditable file conflicts, and directory/file type replacements staying review-held until an explicit audited source resolution applies them. `tests/cow/media_validator.php` fast-gates discovered upload validators for incomplete generated-size metadata, missing original upload files, duplicate upload ownership, and `_wp_attached_file` versus `_wp_attachment_metadata['file']` drift. `tests/cow/merge.php` covers file adds/deletes/conflicts, binary hash comparisons, symlink safety, directory/file and file/directory replacement review, rollback artifacts, upload-file validators, generated attachment file checks, original/generated dimension drift, generated-size filename drift, featured-image/image-block/media metadata drift, and unsafe metadata paths. `tests/cow/e2e.sh` verifies real merged upload originals and generated thumbnails. | Add stricter uploads-specific validators for more drift shapes and explicit attachment-regeneration decisions. | @@ -48,7 +48,7 @@ when there is a test or document that exercises the specific merge invariant. | Area | Current state | Missing reliability work | | --- | --- | --- | -| WordPress semantic objects | Tests cover real post creation, postmeta references, users, usermeta, post/comment authors, threaded comments and commentmeta references, branch-local page edits/deletes with edited content/author assertions, same-object page/postmeta edit-vs-delete conflicts with auditable target-wins defaults, attachment uploads plus original and generated-size files, attachment metadata, attachment-to-page parent links, `core/image` block references and featured-image postmeta references to media attachments, hierarchical taxonomy terms, page-linked nav menus with menu-location assignments, reusable blocks and synced patterns, options with embedded object IDs including branch user refs, JSON option payloads with embedded object IDs including branch user refs, custom post types, plugin AUTOINCREMENT tables, keyless plugin tables, unique collisions, file additions, nested plugin-owned custom-table/JSON/serialized/file graphs, branch merge visibility, a clean zero-conflict semantic E2E merge requirement, a discovered media validator that reports missing original/generated upload files, duplicate attachment claims on the same upload file including same-attachment generated-file duplicates, unreadable or NUL-corrupted attachment metadata, empty or unsafe primary/generated upload metadata paths, original/generated dimension drift, generated-size filename drift, incomplete generated-size metadata, and `_wp_attached_file` versus `_wp_attachment_metadata` file drift, a discovered block-reference validator that reports pages/posts left pointing at deleted reusable blocks or synced patterns, a discovered menu-reference validator that reports nav menu items left pointing at deleted post objects, a discovered option-reference validator that reports serialized theme mods left pointing at deleted post objects, deleted nav-menu terms, or deleted custom-logo attachments plus serialized nav menu widgets, serialized media-image widgets, serialized sidebar-widget placements, scalar `site_icon`/`page_on_front`/`page_for_posts` options, and serialized `sticky_posts` options left pointing at deleted objects, a discovered featured-image validator that reports `_thumbnail_id` postmeta left pointing at deleted attachment objects/files, a discovered image-block validator that reports `core/image` block JSON left pointing at deleted attachment objects/files, a discovered term-relationship validator that reports `wp_term_relationships` left pointing at deleted taxonomy term rows, and `docs/merge-repair-policy.md` defines when semantic repairs must remain review-only. | Add broader concurrent object matrices, implement only the repair policies that have deterministic owners, and broaden plugin-owned graph conflict/drift cases. | +| WordPress semantic objects | Tests cover real post creation, postmeta references, users, usermeta, post/comment authors, threaded comments and commentmeta references, branch-local page edits/deletes with edited content/author assertions, same-object page/postmeta edit-vs-delete conflicts with auditable target-wins defaults, attachment uploads plus original and generated-size files, attachment metadata, attachment-to-page parent links, `core/image` block references and featured-image postmeta references to media attachments, hierarchical taxonomy terms, page-linked nav menus with menu-location assignments, reusable blocks and synced patterns, options with embedded object IDs including branch user refs, JSON option payloads with embedded object IDs including branch user refs, custom post types, plugin AUTOINCREMENT tables, keyless plugin tables, unique collisions, file additions, nested plugin-owned custom-table/JSON/serialized/file graphs, branch merge visibility, a clean zero-conflict semantic E2E merge requirement, a discovered media validator that reports missing original/generated upload files, duplicate attachment claims on the same upload file including same-attachment generated-file duplicates, unreadable or NUL-corrupted attachment metadata, empty or unsafe primary/generated upload metadata paths, original/generated dimension drift, generated-size filename drift, incomplete generated-size metadata, and `_wp_attached_file` versus `_wp_attachment_metadata` file drift, a fast discovered block-reference validator that reports pages/posts left pointing at deleted reusable blocks or synced patterns, a discovered menu-reference validator that reports nav menu items left pointing at deleted post objects, a discovered option-reference validator that reports serialized theme mods left pointing at deleted post objects, deleted nav-menu terms, or deleted custom-logo attachments plus serialized nav menu widgets, serialized media-image widgets, serialized sidebar-widget placements, scalar `site_icon`/`page_on_front`/`page_for_posts` options, and serialized `sticky_posts` options left pointing at deleted objects, a discovered featured-image validator that reports `_thumbnail_id` postmeta left pointing at deleted attachment objects/files, a discovered image-block validator that reports `core/image` block JSON left pointing at deleted attachment objects/files, a discovered term-relationship validator that reports `wp_term_relationships` left pointing at deleted taxonomy term rows, and `docs/merge-repair-policy.md` defines when semantic repairs must remain review-only. | Add broader concurrent object matrices, implement only the repair policies that have deterministic owners, and broaden plugin-owned graph conflict/drift cases. | | Plugin-specific semantics | Generic SQLite merge is table/row/cell based and does not rewrite embedded IDs. `docs/plugin-merge-validators.md` defines the validator boundary and first test shape. PHP unit and E2E coverage now cover the clean happy path for a plugin-owned custom-table graph with parent/child rows, child JSON payload references, serialized option/postmeta references, referenced CPT data, and a referenced file. The PHP unit suite also covers the metadata/audit foundation for plugin-scoped validator conflicts, including review queues and grouping. Normal branch merges discover validators from active plugin and mu-plugin locations in the staged candidate target; discovered custom-table graph validators can abort and roll back a candidate with a broken JSON reference, or complete the merge with plugin-scoped review conflicts for broken serialized graph row/file references and target-conflicting graph state. `forkpress branch run-plugin-validator`, `forkpress branch record-plugin-validator-conflicts`, and `forkpress branch merge --plugin-validator ` expose explicit validator execution and findings recording, while rejecting contradictory valid-with-findings output before it becomes conflict metadata. Validator failures after DB/files have staged roll back the merge. | Add broader plugin-owned validators for more real plugins and plugin merge drivers only where a plugin can prove an automatic repair is safe. | | Review-only schema cases | Cyclic views/triggers, source-added triggers with unresolved dependencies, invalid preserved trigger/view dependencies, and some rebuild dependency chains are held as auditable conflicts. | Improve dependency planning so more safe schema reorderings can apply automatically. Keep non-deterministic or semantically ambiguous cases review-only. | | Filesystem semantics | File additions/deletions/conflicts are audited; binary file changes/conflicts are hash-verified, safe relative symlinks can merge, unsafe symlinks to absolute paths, root-escaping paths, self-references, and ForkPress-managed paths remain conflicts, directory/file and file/directory replacements get type-specific review conflicts, unchanged target descendants and source descendants under reviewed replacements are held until review, reviewed source replacements can apply supported file/dir/symlink changes including directory subtrees, WordPress E2E links attachment rows to original and generated-size upload files, plugin-shaped E2E checks branch-owned file contents, and PHP coverage uses a discovered validator to cross-check attachment metadata against merged upload files, attached-file metadata drift, and duplicate upload ownership. | Add stricter uploads-specific validators for more conflict/drift shapes, including attachment metadata regeneration decisions. | @@ -138,6 +138,13 @@ generated-size metadata, run: make test-cow-media-validator ``` +For WordPress semantic reference validators, including pages left pointing at +deleted reusable blocks or synced patterns, run: + +```bash +make test-cow-wp-semantic-validator +``` + For plugin-owned DB/JSON/file graph validator changes, including validator runner contract checks and rerun replacement evidence, run: diff --git a/tests/cow/wp_semantic_validator.php b/tests/cow/wp_semantic_validator.php new file mode 100644 index 00000000..a36f31ce --- /dev/null +++ b/tests/cow/wp_semantic_validator.php @@ -0,0 +1,214 @@ +isDir() && !$entry->isLink() ? rmdir($entry->getPathname()) : unlink($entry->getPathname()); + } + rmdir($path); +} + +function copy_tree_for_test(string $source, string $dest): void { + mkdir($dest, 0777, true); + $it = new RecursiveIteratorIterator( + new RecursiveDirectoryIterator($source, RecursiveDirectoryIterator::SKIP_DOTS), + RecursiveIteratorIterator::SELF_FIRST + ); + foreach ($it as $entry) { + $target = $dest . '/' . str_replace(DIRECTORY_SEPARATOR, '/', substr($entry->getPathname(), strlen($source) + 1)); + if ($entry->isDir()) { + if (!is_dir($target)) { + mkdir($target, 0777, true); + } + continue; + } + if (!is_dir(dirname($target))) { + mkdir(dirname($target), 0777, true); + } + copy($entry->getPathname(), $target); + } +} + +function write_test_file(string $path, string $contents): void { + if (!is_dir(dirname($path))) { + mkdir(dirname($path), 0777, true); + } + file_put_contents($path, $contents); +} + +function open_db(string $path): SQLite3 { + $db = new SQLite3($path); + $db->busyTimeout(5000); + return $db; +} + +function scalar(string $db_path, string $sql): mixed { + $db = open_db($db_path); + $value = $db->querySingle($sql); + $db->close(); + return $value; +} + +function create_wp_semantic_db(string $path): void { + $db = open_db($path); + $db->exec("CREATE TABLE wp_posts ( + ID INTEGER PRIMARY KEY AUTOINCREMENT, + post_title TEXT NOT NULL DEFAULT '', + post_content TEXT NOT NULL DEFAULT '', + post_status TEXT NOT NULL DEFAULT 'publish', + post_type TEXT NOT NULL DEFAULT 'post', + post_name TEXT NOT NULL DEFAULT '' + )"); + $db->exec('CREATE TABLE wp_postmeta (meta_id INTEGER PRIMARY KEY AUTOINCREMENT, post_id INTEGER NOT NULL, meta_key TEXT NOT NULL, meta_value TEXT NOT NULL)'); + $db->exec("INSERT INTO wp_posts (ID, post_title, post_content, post_status, post_type, post_name) VALUES + (30, 'Shared reusable block', '

Shared block

', 'publish', 'wp_block', 'shared-reusable-block'), + (31, 'Page with reusable block', '

Base page content

', 'publish', 'page', 'page-with-reusable-block'), + (32, 'Shared synced pattern', '

Shared synced pattern

', 'publish', 'wp_block', 'shared-synced-pattern'), + (33, 'Page with synced pattern', '

Base synced pattern content

', 'publish', 'page', 'page-with-synced-pattern')"); + $db->exec("INSERT INTO wp_postmeta (meta_id, post_id, meta_key, meta_value) VALUES (34, 32, 'wp_pattern_sync_status', 'synced')"); + $db->close(); +} + +define('FORKPRESS_COW_MERGE_TESTS', true); +require_once __DIR__ . '/../../scripts/cow/merge.php'; + +echo "=== COW WordPress semantic validator focused tests ===\n"; + +$tmp = sys_get_temp_dir() . '/forkpress-cow-wp-semantic-validator-' . getmypid() . '-' . bin2hex(random_bytes(4)); +mkdir($tmp, 0777, true); + +try { + $base_root = $tmp . '/base'; + $source_root = $tmp . '/source'; + $target_root = $tmp . '/target'; + $base = $base_root . '/wp-content/database/.ht.sqlite'; + $source = $source_root . '/wp-content/database/.ht.sqlite'; + $target = $target_root . '/wp-content/database/.ht.sqlite'; + $metadata = $tmp . '/.forkpress/cow/merge/wp-semantic-validator-metadata.sqlite'; + $file_base = $tmp . '/.forkpress/cow/merge/file-bases/wp-semantic-validator.json'; + + mkdir($base_root . '/wp-content/database', 0777, true); + create_wp_semantic_db($base); + write_test_file($base_root . '/wp-content/mu-plugins/forkpress-merge-validator.php', <<<'PHP' +query("SELECT ID, post_content FROM wp_posts WHERE post_type IN ('page', 'post', 'wp_template_part', 'wp_template')"); +$findings = []; +while ($row = $res->fetchArray(SQLITE3_ASSOC)) { + $content = (string)$row['post_content']; + if (!preg_match_all('/