From 89e1a7a540719f7ed42629d590eb33a2c0da0e50 Mon Sep 17 00:00:00 2001
From: Christophe Coevoet <stof@notk.org>
Date: Sat, 24 Nov 2018 13:07:15 +0100
Subject: [PATCH] Optimize the token comparison

Tokens are always a single char. Using strspn to find whether they belong to
a fixed list is slower than comparing them directly.
---
 src/HTML5/Parser/Tokenizer.php | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 6284733..62c39f1 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -47,8 +47,6 @@ class Tokenizer
     const CONFORMANT_HTML = 'html';
     protected $mode = self::CONFORMANT_HTML;
 
-    const WHITE = "\t\n\f ";
-
     /**
      * Create a new tokenizer.
      *
@@ -159,7 +157,7 @@ protected function consumeData()
                     break;
 
                 default:
-                    if (!strspn($tok, '<&')) {
+                    if ('<' !== $tok && '&' !== $tok) {
                         // NULL character
                         if ("\00" === $tok) {
                             $this->parseError('Received null character.');
@@ -193,7 +191,7 @@ protected function characterData()
             case Elements::TEXT_RCDATA:
                 return $this->rcdata($tok);
             default:
-                if (strspn($tok, '<&')) {
+                if ('<' === $tok || '&' === $tok) {
                     return false;
                 }
 
@@ -1093,7 +1091,7 @@ protected function decodeCharacterReference($inAttribute = false)
 
         // These indicate not an entity. We return just
         // the &.
-        if (1 === strspn($tok, static::WHITE . '&<')) {
+        if ("\t" === $tok || "\n" === $tok || "\f" === $tok || ' ' === $tok || '&' === $tok || '<' === $tok) {
             // $this->scanner->next();
             return '&';
         }