Skip to content

Commit

Permalink
Merge pull request #31 from miso-belica/fix-invalid-tag-names
Browse files Browse the repository at this point in the history
Don't throw an exception for invalid tag names
  • Loading branch information
mattfarina committed Apr 29, 2014
2 parents 1f2861b + 4401688 commit 56c6eac
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 2 deletions.
8 changes: 7 additions & 1 deletion src/HTML5/Parser/DOMTreeBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,14 @@ public function startTag($name, $attributes = array(), $selfClosing = FALSE) {
$lname = Elements::normalizeSvgElement($lname);
}

try {
$ele = $this->doc->createElement($lname);
}
catch(\DOMException $e) {
$this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
$ele = $this->doc->createElement('invalid');
}

$ele = $this->doc->createElement($lname);
foreach ($attributes as $aName => $aVal) {

if ($this->insertMode == static::IM_IN_SVG) {
Expand Down
4 changes: 3 additions & 1 deletion src/HTML5/Parser/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,9 @@ protected function tagName() {
}

// We know this is at least one char.
$name = strtolower($this->scanner->charsUntil("/> \n\f\t"));
$name = strtolower($this->scanner->charsWhile(
":0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
));
$attributes = array();
$selfClose = FALSE;

Expand Down
87 changes: 87 additions & 0 deletions test/HTML5/Parser/TokenizerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,93 @@ public function testSimpleTags() {
}
}

public function testTagsWithAttributeAndMissingName() {
$cases = array(
'<id="top_featured">' => 'id',
'<color="white">' => 'color',
"<class='neaktivni_stranka'>" => 'class',
'<bgcolor="white">' => 'bgcolor',
'<class="nom">' => 'class',
);

foreach($cases as $html => $expected) {
$events = $this->parse($html);
$this->assertEventError($events->get(0));
$this->assertEventError($events->get(1));
$this->assertEventError($events->get(2));
$this->assertEventEquals('startTag', $expected, $events->get(3));
$this->assertEventEquals('eof', NULL, $events->get(4));
}
}

public function testTagNotClosedAfterTagName() {
$cases = array(
"<noscript<img>" => array('noscript', 'img'),
'<center<a>' => array('center', 'a'),
'<br<br>' => array('br', 'br'),
);

foreach($cases as $html => $expected) {
$events = $this->parse($html);
$this->assertEventError($events->get(0));
$this->assertEventEquals('startTag', $expected[0], $events->get(1));
$this->assertEventEquals('startTag', $expected[1], $events->get(2));
$this->assertEventEquals('eof', NULL, $events->get(3));
}

$events = $this->parse('<span<>02</span>');
$this->assertEventError($events->get(0));
$this->assertEventEquals('startTag', 'span', $events->get(1));
$this->assertEventError($events->get(2));
$this->assertEventEquals('text', '>02', $events->get(3));
$this->assertEventEquals('endTag', 'span', $events->get(4));
$this->assertEventEquals('eof', NULL, $events->get(5));

$events = $this->parse('<p</p>');
$this->assertEventError($events->get(0));
$this->assertEventEquals('startTag', 'p', $events->get(1));
$this->assertEventEquals('endTag', 'p', $events->get(2));
$this->assertEventEquals('eof', NULL, $events->get(3));

$events = $this->parse('<strong><WordPress</strong>');
$this->assertEventEquals('startTag', 'strong', $events->get(0));
$this->assertEventError($events->get(1));
$this->assertEventEquals('startTag', 'wordpress', $events->get(2));
$this->assertEventEquals('endTag', 'strong', $events->get(3));
$this->assertEventEquals('eof', NULL, $events->get(4));

$events = $this->parse('<src=<a>');
$this->assertEventError($events->get(0));
$this->assertEventError($events->get(1));
$this->assertEventError($events->get(2));
$this->assertEventEquals('startTag', 'src', $events->get(3));
$this->assertEventEquals('startTag', 'a', $events->get(4));
$this->assertEventEquals('eof', NULL, $events->get(5));

$events = $this->parse('<br...<a>');
$this->assertEventError($events->get(0));
$this->assertEventEquals('startTag', 'br', $events->get(1));
$this->assertEventEquals('eof', NULL, $events->get(2));
}

public function testIllegalTagNames() {
$cases = array(
'<li">' => 'li',
'<p">' => 'p',
'<b&nbsp; >' => 'b',
'<static*all>' => 'static',
'<h*0720/>' => 'h',
'<st*ATTRIBUTE />' => 'st',
'<a-href="http://url.com/">' => 'a',
);

foreach($cases as $html => $expected) {
$events = $this->parse($html);
$this->assertEventError($events->get(0));
$this->assertEventEquals('startTag', $expected, $events->get(1));
}
}

/**
* @depends testCharacterReference
*/
Expand Down

0 comments on commit 56c6eac

Please sign in to comment.