From 89e1a7a540719f7ed42629d590eb33a2c0da0e50 Mon Sep 17 00:00:00 2001 From: Christophe Coevoet Date: Sat, 24 Nov 2018 13:07:15 +0100 Subject: [PATCH] Optimize the token comparison Tokens are always a single char. Using strspn to find whether they belong to a fixed list is slower than comparing them directly. --- src/HTML5/Parser/Tokenizer.php | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 6284733..62c39f1 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -47,8 +47,6 @@ class Tokenizer const CONFORMANT_HTML = 'html'; protected $mode = self::CONFORMANT_HTML; - const WHITE = "\t\n\f "; - /** * Create a new tokenizer. * @@ -159,7 +157,7 @@ protected function consumeData() break; default: - if (!strspn($tok, '<&')) { + if ('<' !== $tok && '&' !== $tok) { // NULL character if ("\00" === $tok) { $this->parseError('Received null character.'); @@ -193,7 +191,7 @@ protected function characterData() case Elements::TEXT_RCDATA: return $this->rcdata($tok); default: - if (strspn($tok, '<&')) { + if ('<' === $tok || '&' === $tok) { return false; } @@ -1093,7 +1091,7 @@ protected function decodeCharacterReference($inAttribute = false) // These indicate not an entity. We return just // the &. - if (1 === strspn($tok, static::WHITE . '&<')) { + if ("\t" === $tok || "\n" === $tok || "\f" === $tok || ' ' === $tok || '&' === $tok || '<' === $tok) { // $this->scanner->next(); return '&'; }