Skip to content

Commit

Permalink
Merge pull request #64 from goetas/i63
Browse files Browse the repository at this point in the history
Case insensitive tags
  • Loading branch information
goetas committed Dec 17, 2014
2 parents de3a3af + 4df8acd commit 398ebb6
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 4 deletions.
10 changes: 6 additions & 4 deletions src/HTML5/Parser/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,9 @@ protected function rcdata()
$sequence = '</' . $this->untilTag . '>';
$txt = '';
$tok = $this->scanner->current();
while ($tok !== false && ! ($tok == '<' && ($this->sequenceMatches($sequence) || $this->sequenceMatches(strtoupper($sequence))))) {

$caseSensitive = !Elements::isHtml5Element($this->untilTag);
while ($tok !== false && ! ($tok == '<' && ($this->sequenceMatches($sequence, $caseSensitive)))) {
if ($tok == '&') {
$txt .= $this->decodeCharacterReference();
$tok = $this->scanner->current();
Expand Down Expand Up @@ -891,7 +893,7 @@ protected function readUntilSequence($sequence)
$buffer .= $this->scanner->charsUntil($first);

// Stop as soon as we hit the stopping condition.
if ($this->sequenceMatches($sequence) || $this->sequenceMatches(strtoupper($sequence))) {
if ($this->sequenceMatches($sequence, false)) {
return $buffer;
}
$buffer .= $this->scanner->current();
Expand All @@ -916,7 +918,7 @@ protected function readUntilSequence($sequence)
* see if the input stream is at the start of a
* '</script>' string.
*/
protected function sequenceMatches($sequence)
protected function sequenceMatches($sequence, $caseSensitive = true)
{
$len = strlen($sequence);
$buffer = '';
Expand All @@ -932,7 +934,7 @@ protected function sequenceMatches($sequence)
}

$this->scanner->unconsume($len);
return $buffer == $sequence;
return $caseSensitive ? $buffer == $sequence : strcasecmp($buffer, $sequence) === 0;
}

/**
Expand Down
22 changes: 22 additions & 0 deletions test/HTML5/Parser/DOMTreeBuilderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,28 @@ public function testDocument()
$this->assertInstanceOf('\DOMDocument', $doc);
$this->assertEquals('html', $doc->documentElement->tagName);
}

public function testStrangeCapitalization()
{
$html = "<!doctype html>
<html>
<head>
<Title>Hello, world!</TitlE>
</head>
<body>TheBody<script>foo</script></body>
</html>";
$doc = $this->parse($html);

$this->assertInstanceOf('\DOMDocument', $doc);
$this->assertEquals('html', $doc->documentElement->tagName);

$xpath = new \DOMXPath( $doc );
$xpath->registerNamespace( "x", "http://www.w3.org/1999/xhtml" );

$this->assertEquals("Hello, world!", $xpath->query( "//x:title" )->item( 0 )->nodeValue);
$this->assertEquals("foo", $xpath->query( "//x:script" )->item( 0 )->nodeValue);
}

public function testDocumentFakeAttrAbsence()
{
$html = "<!DOCTYPE html><html xmlns=\"http://www.w3.org/1999/xhtml\"><body>foo</body></html>";
Expand Down

0 comments on commit 398ebb6

Please sign in to comment.