diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 7b0b3f3..4aeeb50 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -121,11 +121,16 @@ public function setTextMode($textmode, $untilTag = null) */ protected function consumeData() { - // Character reference - $this->characterReference(); - $tok = $this->scanner->current(); + if ($tok === '&') { + // Character reference + $ref = $this->decodeCharacterReference(); + $this->buffer($ref); + + $tok = $this->scanner->current(); + } + // Parse tag if ($tok === '<') { // Any buffered text data can go out now. @@ -303,25 +308,6 @@ protected function eof($tok) return false; } - /** - * Handle character references (aka entities). - * - * This version is specific to PCDATA, as it buffers data into the - * text buffer. For a generic version, see decodeCharacterReference(). - * - * HTML5 8.2.4.2 - */ - protected function characterReference() - { - if ($this->scanner->current() !== '&') { - return false; - } - - $ref = $this->decodeCharacterReference(); - $this->buffer($ref); - return true; - } - /** * Look for markup. */