From fd3ef27e254645cac4d4f1dd3abbfc9c431592ce Mon Sep 17 00:00:00 2001 From: Alec Geatches Date: Fri, 18 Oct 2024 10:19:08 -0600 Subject: [PATCH 1/4] Add failing test for multiple-node raw sources --- tests/parser/sources/test-source-raw.php | 28 ++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/parser/sources/test-source-raw.php b/tests/parser/sources/test-source-raw.php index f6d52b9..813d6bc 100644 --- a/tests/parser/sources/test-source-raw.php +++ b/tests/parser/sources/test-source-raw.php @@ -79,4 +79,32 @@ public function test_parse_raw_source__nested() { $this->assertArrayHasKey( 'blocks', $blocks, sprintf( 'Unexpected parser output: %s', wp_json_encode( $blocks ) ) ); $this->assertArraySubset( $expected_blocks, $blocks['blocks'], true ); } + + public function test_parse_raw_source_multiple_top_level_nodes() { + $this->register_block_with_attributes( 'test/html', [ + 'content' => [ + 'type' => 'string', + 'source' => 'raw', + ], + ] ); + + $html = ' + +

Node 1

Node 2

+ '; + + $expected_blocks = [ + [ + 'name' => 'test/html', + 'attributes' => [ + 'content' => '

Node 1

Node 2

', + ], + ], + ]; + + $content_parser = new ContentParser( $this->get_block_registry() ); + $blocks = $content_parser->parse( $html ); + $this->assertArrayHasKey( 'blocks', $blocks, sprintf( 'Unexpected parser output: %s', wp_json_encode( $blocks ) ) ); + $this->assertArraySubset( $expected_blocks, $blocks['blocks'], true ); + } } From 14858ee64cf015409c40ca2db552c3979b1e21f1 Mon Sep 17 00:00:00 2001 From: Alec Geatches Date: Fri, 18 Oct 2024 10:58:34 -0600 Subject: [PATCH 2/4] Add failing test for internal whitespace --- tests/parser/sources/test-source-raw.php | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/parser/sources/test-source-raw.php b/tests/parser/sources/test-source-raw.php index 813d6bc..59c60f5 100644 --- a/tests/parser/sources/test-source-raw.php +++ b/tests/parser/sources/test-source-raw.php @@ -107,4 +107,33 @@ public function test_parse_raw_source_multiple_top_level_nodes() { $this->assertArrayHasKey( 'blocks', $blocks, sprintf( 'Unexpected parser output: %s', wp_json_encode( $blocks ) ) ); $this->assertArraySubset( $expected_blocks, $blocks['blocks'], true ); } + + public function test_parse_raw_source_multiple_top_level_nodes_with_whitespace() { + $this->register_block_with_attributes( 'test/html', [ + 'content' => [ + 'type' => 'string', + 'source' => 'raw', + ], + ] ); + + $html = ' + + This should retain inner +whitespace + '; + + $expected_blocks = [ + [ + 'name' => 'test/html', + 'attributes' => [ + 'content' => "This should retain inner\nwhitespace", + ], + ], + ]; + + $content_parser = new ContentParser( $this->get_block_registry() ); + $blocks = $content_parser->parse( $html ); + $this->assertArrayHasKey( 'blocks', $blocks, sprintf( 'Unexpected parser output: %s', wp_json_encode( $blocks ) ) ); + $this->assertArraySubset( $expected_blocks, $blocks['blocks'], true ); + } } From b6be8dcb3d183fbfa4fd29ab8082f8ce95e3f61b Mon Sep 17 00:00:00 2001 From: Alec Geatches Date: Fri, 18 Oct 2024 10:59:05 -0600 Subject: [PATCH 3/4] Change `source_block_raw` to use parent body tag to preserve whitespace --- src/parser/content-parser.php | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/parser/content-parser.php b/src/parser/content-parser.php index c5c4e94..bfad2f2 100644 --- a/src/parser/content-parser.php +++ b/src/parser/content-parser.php @@ -680,10 +680,15 @@ protected function source_block_raw( $crawler ) { // Also see tag attribute parsing in Gutenberg: // https://github.com/WordPress/gutenberg/blob/6517008/packages/blocks/src/api/parser/get-block-attributes.js#L131 - $attribute_value = null; - if ( $crawler->count() > 0 ) { - $attribute_value = trim( $crawler->outerHtml() ); + // $crawler's outerHtml() will only return the HTML of the first node in this raw HTML. + // If the raw HTML contains multiple top-level nodes, we need to use the inner HTML of the wrapping + // 'body' tag. This will also preserve internal whitespace in the HTML. + $body_node = $crawler->closest( 'body' ); + + if ( $body_node && $body_node->count() > 0 ) { + $attribute_value = trim( $body_node->html() ); + } } return $attribute_value; From a4939bbe953548544e670e74207d1973d1ab4ae0 Mon Sep 17 00:00:00 2001 From: Alec Geatches Date: Fri, 18 Oct 2024 11:07:25 -0600 Subject: [PATCH 4/4] Fix removed default value --- src/parser/content-parser.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/parser/content-parser.php b/src/parser/content-parser.php index bfad2f2..7c8535b 100644 --- a/src/parser/content-parser.php +++ b/src/parser/content-parser.php @@ -680,6 +680,8 @@ protected function source_block_raw( $crawler ) { // Also see tag attribute parsing in Gutenberg: // https://github.com/WordPress/gutenberg/blob/6517008/packages/blocks/src/api/parser/get-block-attributes.js#L131 + $attribute_value = null; + if ( $crawler->count() > 0 ) { // $crawler's outerHtml() will only return the HTML of the first node in this raw HTML. // If the raw HTML contains multiple top-level nodes, we need to use the inner HTML of the wrapping