diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php index b79c298..964d662 100644 --- a/src/HTML5/Parser/DOMTreeBuilder.php +++ b/src/HTML5/Parser/DOMTreeBuilder.php @@ -223,8 +223,14 @@ public function startTag($name, $attributes = array(), $selfClosing = FALSE) { $lname = Elements::normalizeSvgElement($lname); } + try { + $ele = $this->doc->createElement($lname); + } + catch(\DOMException $e) { + $this->parseError("Illegal tag name: <$lname>. Replaced with ."); + $ele = $this->doc->createElement('invalid'); + } - $ele = $this->doc->createElement($lname); foreach ($attributes as $aName => $aVal) { if ($this->insertMode == static::IM_IN_SVG) { diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index f21d30b..a78cf23 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -322,7 +322,9 @@ protected function tagName() { } // We know this is at least one char. - $name = strtolower($this->scanner->charsUntil("/> \n\f\t")); + $name = strtolower($this->scanner->charsWhile( + ":0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + )); $attributes = array(); $selfClose = FALSE; diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php index 2a111bc..4ca8ed3 100644 --- a/test/HTML5/Parser/TokenizerTest.php +++ b/test/HTML5/Parser/TokenizerTest.php @@ -320,6 +320,93 @@ public function testSimpleTags() { } } + public function testTagsWithAttributeAndMissingName() { + $cases = array( + '' => 'id', + '' => 'color', + "" => 'class', + '' => 'bgcolor', + '' => 'class', + ); + + foreach($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('startTag', $expected, $events->get(3)); + $this->assertEventEquals('eof', NULL, $events->get(4)); + } + } + + public function testTagNotClosedAfterTagName() { + $cases = array( + "" => array('noscript', 'img'), + '' => array('center', 'a'), + '' => array('br', 'br'), + ); + + foreach($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expected[0], $events->get(1)); + $this->assertEventEquals('startTag', $expected[1], $events->get(2)); + $this->assertEventEquals('eof', NULL, $events->get(3)); + } + + $events = $this->parse('02'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'span', $events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('text', '>02', $events->get(3)); + $this->assertEventEquals('endTag', 'span', $events->get(4)); + $this->assertEventEquals('eof', NULL, $events->get(5)); + + $events = $this->parse(''); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'p', $events->get(1)); + $this->assertEventEquals('endTag', 'p', $events->get(2)); + $this->assertEventEquals('eof', NULL, $events->get(3)); + + $events = $this->parse(''); + $this->assertEventEquals('startTag', 'strong', $events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventEquals('startTag', 'wordpress', $events->get(2)); + $this->assertEventEquals('endTag', 'strong', $events->get(3)); + $this->assertEventEquals('eof', NULL, $events->get(4)); + + $events = $this->parse(''); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('startTag', 'src', $events->get(3)); + $this->assertEventEquals('startTag', 'a', $events->get(4)); + $this->assertEventEquals('eof', NULL, $events->get(5)); + + $events = $this->parse(''); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'br', $events->get(1)); + $this->assertEventEquals('eof', NULL, $events->get(2)); + } + + public function testIllegalTagNames() { + $cases = array( + '' => 'li', + '' => 'p', + '' => 'b', + '' => 'static', + '' => 'h', + '' => 'st', + '' => 'a', + ); + + foreach($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expected, $events->get(1)); + } + } + /** * @depends testCharacterReference */