Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compatible with facebook HHVM 3.2 #52

Merged
merged 1 commit into from
Jun 26, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ php:
- 5.3
- 5.4
- 5.5
- hhvm-nightly

notifications:
irc: "irc.freenode.net#masterminds"
Expand Down
1 change: 1 addition & 0 deletions phpunit.xml.dist
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
</testsuites>
<filter>
<blacklist>
<file>systemlib.phpreflection_hni</file>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not all that familiar with HHVM. What is systemlib.phpreflection_hni and why is it blacklisted but not part of the repo?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is a phpunit/codecoverage limitation

I have commented it here https://github.com/goetas/html5-php/commit/f902b05cc252721e5440f3437d0c5de3e0a8d789#comments

It could blacklisted by default, but it is a @sebastianbergmann issue

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course I could take care of this in PHP_CodeCoverage. But IMHO it should be taken care of in HHVM. HHVM's code coverage API should not expose data for systemlib*.

@ptarjan @sgolemon

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, we can (and should) fix it. Can you cook up a small example and submit an issue please?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sebastianbergmann @ptarjan I've opened an issue here: facebook/hhvm#2992

<file>src/HTML5/Parser/InputStream.php</file>
<file>src/HTML5/Serializer/RulesInterface.php</file>
<file>src/HTML5/Entities.php</file>
Expand Down
10 changes: 3 additions & 7 deletions src/HTML5.php
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,9 @@ public function parse(\Masterminds\HTML5\Parser\InputStream $input)
$parser = new Tokenizer($scanner, $events);

$parser->parse();
$this->errors = $events->getErrors();

$document = $events->document();

if ($document) {
$this->errors = $document->errors;
}

return $document;
return $events->document();
}

/**
Expand All @@ -186,6 +181,7 @@ public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input)
$parser = new Tokenizer($scanner, $events);

$parser->parse();
$this->errors = $events->getErrors();

return $events->fragment();
}
Expand Down
23 changes: 19 additions & 4 deletions src/HTML5/Parser/DOMTreeBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ class DOMTreeBuilder implements EventHandler
*/
protected $quirks = true;

protected $errors = array();

public function __construct($isFragment = false, array $options = array())
{
$this->options = $options;
Expand All @@ -156,7 +158,7 @@ public function __construct($isFragment = false, array $options = array())
$dt = $impl->createDocumentType('html');
// $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
$this->doc = $impl->createDocument(null, null, $dt);
$this->doc->errors = array();
$this->errors = array();

$this->current = $this->doc; // ->documentElement;

Expand Down Expand Up @@ -195,7 +197,6 @@ public function document()
*/
public function fragment()
{
$this->frag->errors = $this->doc->errors;
return $this->frag;
}

Expand Down Expand Up @@ -337,6 +338,9 @@ public function startTag($name, $attributes = array(), $selfClosing = false)
// to avoid spl_object_hash collisions whe have to avoid garbage collection of $ele storing it into $pushes
// see https://bugs.php.net/bug.php?id=67459
$this->pushes[spl_object_hash($ele)] = array($pushes, $ele);

// SEE https://github.com/facebook/hhvm/issues/2962
$ele->setAttribute('html5-php-fake-id-attribute', spl_object_hash($ele));
}

foreach ($attributes as $aName => $aVal) {
Expand Down Expand Up @@ -438,7 +442,13 @@ public function endTag($name)
return;
}

$cid = spl_object_hash($this->current);
// https://github.com/facebook/hhvm/issues/2962
if ($cid = $this->current->getAttribute('html5-php-fake-id-attribute')) {
$this->current->removeAttribute('html5-php-fake-id-attribute');
} else {
$cid = spl_object_hash($this->current);
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spacing appears to be off in characters surrounding the if/else.

// remove the namespaced definded by current node
if (isset($this->pushes[$cid])) {
for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) {
Expand Down Expand Up @@ -501,7 +511,12 @@ public function eof()

public function parseError($msg, $line = 0, $col = 0)
{
$this->doc->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
$this->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
}

public function getErrors()
{
return $this->errors;
}

public function cdata($data)
Expand Down
29 changes: 20 additions & 9 deletions src/HTML5/Serializer/OutputRules.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface

const IM_IN_MATHML = 3;

private $hasHTML5 = false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this meant to be private or should it be protected?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is just a little hack to avoid defined('ENT_HTML5') && !defined('HHVM_VERSION') check for each enc() call.
Since it has only in-class caching propose, i prefer to avoid exposing another possible dependency.

public props = dependency
protected props = dependency (...)
private props = no-dependency


protected $traverser;

protected $encode = false;
Expand All @@ -40,6 +42,9 @@ public function __construct($output, $options = array())

$this->outputMode = static::IM_IN_HTML;
$this->out = $output;

// If HHVM, see https://github.com/facebook/hhvm/issues/2727
$this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION');
}

public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser)
Expand Down Expand Up @@ -83,15 +88,20 @@ public function element($ele)
}

$this->openTag($ele);
if (Elements::isA($name, Elements::TEXT_RAW)) {
foreach ($ele->childNodes as $child) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't iterating over the child nodes be done by the traverser? Why was it moved into the output rules? I'm also curious what TEXT_RAW is a special case.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

<script>
//<![CDATA[
alert(1)
//]]>
</script>

$ele->data contains just:

//
alert(1)
//

$ele>childNodes->length = 3

//
<![CDATA[
alert(1)
//]]>

$this->wr($child->data);
}
} else {
// Handle children.
if ($ele->hasChildNodes()) {
$this->traverser->children($ele->childNodes);
}

// Handle children.
if ($ele->hasChildNodes()) {
$this->traverser->children($ele->childNodes);
}

// Close out the SVG or MathML special handling.
if ($name == 'svg' || $name == 'math') {
$this->outputMode = static::IM_IN_HTML;
// Close out the SVG or MathML special handling.
if ($name == 'svg' || $name == 'math') {
$this->outputMode = static::IM_IN_HTML;
}
}

// If not unary, add a closing tag.
Expand Down Expand Up @@ -285,7 +295,8 @@ protected function enc($text, $attribute = false)

// If we are in PHP 5.4+ we can use the native html5 entity functionality to
// convert the named character references.
if (defined('ENT_HTML5')) {

if ($this->hasHTML5) {
return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
} // If a version earlier than 5.4 html5 entities are not entirely handled.
// This manually handles them.
Expand Down
16 changes: 8 additions & 8 deletions test/HTML5/Parser/DOMTreeBuilderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/
class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
{

protected $errors = array();
/**
* Convenience function for parsing.
*/
Expand All @@ -27,6 +27,7 @@ protected function parse($string, array $options = array())
$parser = new Tokenizer($scanner, $treeBuilder);

$parser->parse();
$this->errors = $treeBuilder->getErrors();

return $treeBuilder->document();
}
Expand All @@ -42,6 +43,7 @@ protected function parseFragment($string)
$parser = new Tokenizer($scanner, $treeBuilder);

$parser->parse();
$this->errors = $treeBuilder->getErrors();

return $treeBuilder->fragment();
}
Expand Down Expand Up @@ -153,12 +155,10 @@ public function testXmlNamespaceNesting()
<c id="bar2" xmlns="bar2"></c>
<div id="div"></div>
<d id="bar3"></d>

</body>
</html>', array(
'xmlNamespaces' => true
));

$div = $dom->getElementById('div');
$this->assertEquals('http://www.w3.org/1999/xhtml', $div->namespaceURI);

Expand Down Expand Up @@ -307,7 +307,7 @@ public function testText()
$html = "<!DOCTYPE html><html>
Foo<head></head><body></body></html>";
$doc = $this->parse($html);
$this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $doc->errors[0]);
$this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $this->errors[0]);
$headElement = $doc->documentElement->firstChild;
$this->assertEquals('head', $headElement->tagName);
}
Expand All @@ -319,8 +319,8 @@ public function testParseErrors()

// We're JUST testing that we can access errors. Actual testing of
// error messages happen in the Tokenizer's tests.
$this->assertGreaterThan(0, count($doc->errors));
$this->assertTrue(is_string($doc->errors[0]));
$this->assertGreaterThan(0, count($this->errors));
$this->assertTrue(is_string($this->errors[0]));
}

public function testProcessingInstruction()
Expand Down Expand Up @@ -419,7 +419,7 @@ public function testNoScript()
{
$html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>';
$doc = $this->parse($html);
$this->assertEmpty($doc->errors);
$this->assertEmpty($this->errors);
$noscript = $doc->getElementsByTagName('noscript')->item(0);
$this->assertEquals('noscript', $noscript->tagName);
}
Expand All @@ -433,7 +433,7 @@ public function testRegressionHTMLNoBody()
$doc = $this->parse($html);
$span = $doc->getElementById('test');

$this->assertEmpty($doc->errors);
$this->assertEmpty($this->errors);

$this->assertEquals('span', $span->tagName);
$this->assertEquals('Test', $span->textContent);
Expand Down
6 changes: 3 additions & 3 deletions test/HTML5/Serializer/OutputRulesTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -330,14 +330,14 @@ public function testText()
<html lang="en">
<head id="foo"></head>
</html>');
$dom->getElementById('foo')->appendChild(new \DOMText('<script>alert("hi");</script>'));
$foo = $dom->getElementById('foo');
$foo->appendChild(new \DOMText('<script>alert("hi");</script>'));

$stream = fopen('php://temp', 'w');
$r = new OutputRules($stream, $this->html5->getOptions());
$t = new Traverser($dom, $stream, $r, $this->html5->getOptions());

$item = $dom->getElementById('foo');
$r->text($item->firstChild);
$r->text($foo->firstChild);
$this->assertEquals('&lt;script&gt;alert("hi");&lt;/script&gt;', stream_get_contents($stream, - 1, 0));
}

Expand Down