Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed infinite loop for char "&" in unquoted attribute #28

Merged
merged 2 commits into from
Feb 11, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions src/HTML5/Parser/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ protected function unquotedAttributeValue() {
while (strspn($tok, $stoplist) == 0 && $tok !== FALSE) {
if ($tok == '&') {
$val .= $this->decodeCharacterReference(TRUE);
$tok = $this->scanner->current();
}
else {
if(strspn($tok, "\"'<=`") > 0) {
Expand Down Expand Up @@ -774,7 +775,7 @@ protected function cdataSection() {
*
* XML processing instructions are supposed to be ignored in HTML5,
* treated as "bogus comments". However, since we're not a user
* agent, we allow them. We consume until ?> and then issue a
* agent, we allow them. We consume until ?> and then issue a
* EventListener::processingInstruction() event.
*/
protected function processingInstruction() {
Expand Down Expand Up @@ -819,7 +820,7 @@ protected function processingInstruction() {
// ================================================================

/**
* Read from the input stream until we get to the desired sequene
* Read from the input stream until we get to the desired sequene
* or hit the end of the input stream.
*/
protected function readUntilSequence($sequence) {
Expand Down Expand Up @@ -849,11 +850,11 @@ protected function readUntilSequence($sequence) {
* This will read the stream for the $sequence. If it's
* found, this will return TRUE. If not, return FALSE.
* Since this unconsumes any chars it reads, the caller
* will still need to read the next sequence, even if
* will still need to read the next sequence, even if
* this returns TRUE.
*
* Example: $this->sequenceMatches('</script>') will
* see if the input stream is at the start of a
* see if the input stream is at the start of a
* '</script>' string.
*/
protected function sequenceMatches($sequence) {
Expand Down Expand Up @@ -902,7 +903,7 @@ protected function buffer($str) {
/**
* Emit a parse error.
*
* A parse error always returns FALSE because it never consumes any
* A parse error always returns FALSE because it never consumes any
* characters.
*/
protected function parseError($msg) {
Expand Down Expand Up @@ -1008,7 +1009,7 @@ protected function decodeCharacterReference($inAttribute = FALSE) {
return $entity;
}

// If in an attribute, then failing to match ; means unconsume the
// If in an attribute, then failing to match ; means unconsume the
// entire string. Otherwise, failure to match is an error.
if ($inAttribute) {
$this->scanner->unconsume($this->scanner->position() - $start);
Expand Down
12 changes: 7 additions & 5 deletions test/HTML5/Parser/TokenizerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ public function testCharacterReference() {
$e1 = $events->get(0);
$this->assertEquals('error', $e1['name']);

// FIXME: Once the text processor is done, need to verify that the
// FIXME: Once the text processor is done, need to verify that the
// tokens are transformed correctly into text.
}

Expand Down Expand Up @@ -139,12 +139,12 @@ public function testEndTag() {
$succeed = array(
'</a>' => 'a',
'</test>' => 'test',
'</test
'</test
>' => 'test',
'</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' =>
'thisisthetagthatdoesntenditjustgoesonandonmyfriend',
// See 8.2.4.10, which requires this and does not say error.
'</a<b>' => 'a<b',
'</a<b>' => 'a<b',
);
$this->isAllGood('endTag', 2, $succeed);

Expand Down Expand Up @@ -271,8 +271,8 @@ public function testDoctype() {
public function testProcessorInstruction() {
$good = array(
'<?hph ?>' => 'hph',
'<?hph echo "Hello World"; ?>' => array('hph', 'echo "Hello World"; '),
"<?hph \necho 'Hello World';\n?>" => array('hph', "echo 'Hello World';\n"),
'<?hph echo "Hello World"; ?>' => array('hph', 'echo "Hello World"; '),
"<?hph \necho 'Hello World';\n?>" => array('hph', "echo 'Hello World';\n"),
);
$this->isAllGood('pi', 2, $good);
}
Expand Down Expand Up @@ -379,6 +379,8 @@ public function testTagAttributes() {
$reallyBad = array(
'<foo ="bar">' => array('foo', array('=' => NULL, '"bar"' => NULL), FALSE),
'<foo////>' => array('foo', array(), TRUE),
// character "&" in unquoted attribute shouldn't cause an infinite loop
'<foo bar=index.php?str=1&amp;id=29>' => array('foo', array('bar' => 'index.php?str=1&id=29'), FALSE),
);
foreach ($reallyBad as $test => $expects) {
$events = $this->parse($test);
Expand Down