From 45e0f801d896258022c6a70a7738b991cb19aafe Mon Sep 17 00:00:00 2001 From: Asmir Mustafic Date: Thu, 26 Jun 2014 15:19:38 +0200 Subject: [PATCH] Added compatibility with Facebook HHVM 3.2 --- .travis.yml | 1 + phpunit.xml.dist | 1 + src/HTML5.php | 10 +++----- src/HTML5/Parser/DOMTreeBuilder.php | 23 ++++++++++++++---- src/HTML5/Serializer/OutputRules.php | 29 ++++++++++++++++------- test/HTML5/Parser/DOMTreeBuilderTest.php | 16 ++++++------- test/HTML5/Serializer/OutputRulesTest.php | 6 ++--- 7 files changed, 55 insertions(+), 31 deletions(-) diff --git a/.travis.yml b/.travis.yml index af88acc..6adea14 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ php: - 5.3 - 5.4 - 5.5 + - hhvm-nightly notifications: irc: "irc.freenode.net#masterminds" diff --git a/phpunit.xml.dist b/phpunit.xml.dist index 8dff222..c344880 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -7,6 +7,7 @@ + systemlib.phpreflection_hni src/HTML5/Parser/InputStream.php src/HTML5/Serializer/RulesInterface.php src/HTML5/Entities.php diff --git a/src/HTML5.php b/src/HTML5.php index 16187df..0b2d368 100644 --- a/src/HTML5.php +++ b/src/HTML5.php @@ -163,14 +163,9 @@ public function parse(\Masterminds\HTML5\Parser\InputStream $input) $parser = new Tokenizer($scanner, $events); $parser->parse(); + $this->errors = $events->getErrors(); - $document = $events->document(); - - if ($document) { - $this->errors = $document->errors; - } - - return $document; + return $events->document(); } /** @@ -186,6 +181,7 @@ public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input) $parser = new Tokenizer($scanner, $events); $parser->parse(); + $this->errors = $events->getErrors(); return $events->fragment(); } diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php index 731d1d8..b9e587f 100644 --- a/src/HTML5/Parser/DOMTreeBuilder.php +++ b/src/HTML5/Parser/DOMTreeBuilder.php @@ -145,6 +145,8 @@ class DOMTreeBuilder implements EventHandler */ protected $quirks = true; + protected $errors = array(); + public function __construct($isFragment = false, array $options = array()) { $this->options = $options; @@ -156,7 +158,7 @@ public function __construct($isFragment = false, array $options = array()) $dt = $impl->createDocumentType('html'); // $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt); $this->doc = $impl->createDocument(null, null, $dt); - $this->doc->errors = array(); + $this->errors = array(); $this->current = $this->doc; // ->documentElement; @@ -195,7 +197,6 @@ public function document() */ public function fragment() { - $this->frag->errors = $this->doc->errors; return $this->frag; } @@ -337,6 +338,9 @@ public function startTag($name, $attributes = array(), $selfClosing = false) // to avoid spl_object_hash collisions whe have to avoid garbage collection of $ele storing it into $pushes // see https://bugs.php.net/bug.php?id=67459 $this->pushes[spl_object_hash($ele)] = array($pushes, $ele); + + // SEE https://github.com/facebook/hhvm/issues/2962 + $ele->setAttribute('html5-php-fake-id-attribute', spl_object_hash($ele)); } foreach ($attributes as $aName => $aVal) { @@ -438,7 +442,13 @@ public function endTag($name) return; } - $cid = spl_object_hash($this->current); + // https://github.com/facebook/hhvm/issues/2962 + if ($cid = $this->current->getAttribute('html5-php-fake-id-attribute')) { + $this->current->removeAttribute('html5-php-fake-id-attribute'); + } else { + $cid = spl_object_hash($this->current); + } + // remove the namespaced definded by current node if (isset($this->pushes[$cid])) { for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) { @@ -501,7 +511,12 @@ public function eof() public function parseError($msg, $line = 0, $col = 0) { - $this->doc->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg); + $this->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg); + } + + public function getErrors() + { + return $this->errors; } public function cdata($data) diff --git a/src/HTML5/Serializer/OutputRules.php b/src/HTML5/Serializer/OutputRules.php index 2969383..ff8341e 100644 --- a/src/HTML5/Serializer/OutputRules.php +++ b/src/HTML5/Serializer/OutputRules.php @@ -22,6 +22,8 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface const IM_IN_MATHML = 3; + private $hasHTML5 = false; + protected $traverser; protected $encode = false; @@ -40,6 +42,9 @@ public function __construct($output, $options = array()) $this->outputMode = static::IM_IN_HTML; $this->out = $output; + + // If HHVM, see https://github.com/facebook/hhvm/issues/2727 + $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION'); } public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser) @@ -83,15 +88,20 @@ public function element($ele) } $this->openTag($ele); + if (Elements::isA($name, Elements::TEXT_RAW)) { + foreach ($ele->childNodes as $child) { + $this->wr($child->data); + } + } else { + // Handle children. + if ($ele->hasChildNodes()) { + $this->traverser->children($ele->childNodes); + } - // Handle children. - if ($ele->hasChildNodes()) { - $this->traverser->children($ele->childNodes); - } - - // Close out the SVG or MathML special handling. - if ($name == 'svg' || $name == 'math') { - $this->outputMode = static::IM_IN_HTML; + // Close out the SVG or MathML special handling. + if ($name == 'svg' || $name == 'math') { + $this->outputMode = static::IM_IN_HTML; + } } // If not unary, add a closing tag. @@ -285,7 +295,8 @@ protected function enc($text, $attribute = false) // If we are in PHP 5.4+ we can use the native html5 entity functionality to // convert the named character references. - if (defined('ENT_HTML5')) { + + if ($this->hasHTML5) { return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false); } // If a version earlier than 5.4 html5 entities are not entirely handled. // This manually handles them. diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php index 0046d88..2745395 100644 --- a/test/HTML5/Parser/DOMTreeBuilderTest.php +++ b/test/HTML5/Parser/DOMTreeBuilderTest.php @@ -15,7 +15,7 @@ */ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase { - + protected $errors = array(); /** * Convenience function for parsing. */ @@ -27,6 +27,7 @@ protected function parse($string, array $options = array()) $parser = new Tokenizer($scanner, $treeBuilder); $parser->parse(); + $this->errors = $treeBuilder->getErrors(); return $treeBuilder->document(); } @@ -42,6 +43,7 @@ protected function parseFragment($string) $parser = new Tokenizer($scanner, $treeBuilder); $parser->parse(); + $this->errors = $treeBuilder->getErrors(); return $treeBuilder->fragment(); } @@ -153,12 +155,10 @@ public function testXmlNamespaceNesting()
- ', array( 'xmlNamespaces' => true )); - $div = $dom->getElementById('div'); $this->assertEquals('http://www.w3.org/1999/xhtml', $div->namespaceURI); @@ -307,7 +307,7 @@ public function testText() $html = " Foo"; $doc = $this->parse($html); - $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $doc->errors[0]); + $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $this->errors[0]); $headElement = $doc->documentElement->firstChild; $this->assertEquals('head', $headElement->tagName); } @@ -319,8 +319,8 @@ public function testParseErrors() // We're JUST testing that we can access errors. Actual testing of // error messages happen in the Tokenizer's tests. - $this->assertGreaterThan(0, count($doc->errors)); - $this->assertTrue(is_string($doc->errors[0])); + $this->assertGreaterThan(0, count($this->errors)); + $this->assertTrue(is_string($this->errors[0])); } public function testProcessingInstruction() @@ -419,7 +419,7 @@ public function testNoScript() { $html = ''; $doc = $this->parse($html); - $this->assertEmpty($doc->errors); + $this->assertEmpty($this->errors); $noscript = $doc->getElementsByTagName('noscript')->item(0); $this->assertEquals('noscript', $noscript->tagName); } @@ -433,7 +433,7 @@ public function testRegressionHTMLNoBody() $doc = $this->parse($html); $span = $doc->getElementById('test'); - $this->assertEmpty($doc->errors); + $this->assertEmpty($this->errors); $this->assertEquals('span', $span->tagName); $this->assertEquals('Test', $span->textContent); diff --git a/test/HTML5/Serializer/OutputRulesTest.php b/test/HTML5/Serializer/OutputRulesTest.php index a54a754..f12acbb 100644 --- a/test/HTML5/Serializer/OutputRulesTest.php +++ b/test/HTML5/Serializer/OutputRulesTest.php @@ -330,14 +330,14 @@ public function testText() '); - $dom->getElementById('foo')->appendChild(new \DOMText('')); + $foo = $dom->getElementById('foo'); + $foo->appendChild(new \DOMText('')); $stream = fopen('php://temp', 'w'); $r = new OutputRules($stream, $this->html5->getOptions()); $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); - $item = $dom->getElementById('foo'); - $r->text($item->firstChild); + $r->text($foo->firstChild); $this->assertEquals('<script>alert("hi");</script>', stream_get_contents($stream, - 1, 0)); }