@@ -110,8 +110,12 @@ function isAsciiLower(cp) {
110110 return cp >= $ . LATIN_SMALL_A && cp <= $ . LATIN_SMALL_Z ;
111111}
112112
113+ function isAsciiLetter ( cp ) {
114+ return isAsciiLower ( cp ) || isAsciiUpper ( cp ) ;
115+ }
116+
113117function isAsciiAlphaNumeric ( cp ) {
114- return isAsciiDigit ( cp ) || isAsciiUpper ( cp ) || isAsciiLower ( cp ) ;
118+ return isAsciiLetter ( cp ) || isAsciiDigit ( cp ) ;
115119}
116120
117121function isDigit ( cp , isHex ) {
@@ -337,12 +341,12 @@ Tokenizer.prototype.buildEndTagToken = function (tagName) {
337341 } ;
338342} ;
339343
340- Tokenizer . prototype . _createStartTagToken = function ( tagNameFirstCh ) {
341- this . currentToken = this . buildStartTagToken ( tagNameFirstCh ) ;
344+ Tokenizer . prototype . _createStartTagToken = function ( ) {
345+ this . currentToken = this . buildStartTagToken ( '' ) ;
342346} ;
343347
344- Tokenizer . prototype . _createEndTagToken = function ( tagNameFirstCh ) {
345- this . currentToken = this . buildEndTagToken ( tagNameFirstCh ) ;
348+ Tokenizer . prototype . _createEndTagToken = function ( ) {
349+ this . currentToken = this . buildEndTagToken ( '' ) ;
346350} ;
347351
348352Tokenizer . prototype . _createCommentToken = function ( ) {
@@ -715,14 +719,9 @@ _[TAG_OPEN_STATE] = function tagOpenState(cp) {
715719 else if ( cp === $ . SOLIDUS )
716720 this . state = END_TAG_OPEN_STATE ;
717721
718- else if ( isAsciiUpper ( cp ) ) {
719- this . _createStartTagToken ( toAsciiLowerChar ( cp ) ) ;
720- this . state = TAG_NAME_STATE ;
721- }
722-
723- else if ( isAsciiLower ( cp ) ) {
724- this . _createStartTagToken ( toChar ( cp ) ) ;
725- this . state = TAG_NAME_STATE ;
722+ else if ( isAsciiLetter ( cp ) ) {
723+ this . _createStartTagToken ( ) ;
724+ this . _reconsumeInState ( TAG_NAME_STATE ) ;
726725 }
727726
728727 else if ( cp === $ . QUESTION_MARK )
@@ -738,14 +737,9 @@ _[TAG_OPEN_STATE] = function tagOpenState(cp) {
738737//12.2.4.9 End tag open state
739738//------------------------------------------------------------------
740739_ [ END_TAG_OPEN_STATE ] = function endTagOpenState ( cp ) {
741- if ( isAsciiUpper ( cp ) ) {
742- this . _createEndTagToken ( toAsciiLowerChar ( cp ) ) ;
743- this . state = TAG_NAME_STATE ;
744- }
745-
746- else if ( isAsciiLower ( cp ) ) {
747- this . _createEndTagToken ( toChar ( cp ) ) ;
748- this . state = TAG_NAME_STATE ;
740+ if ( isAsciiLetter ( cp ) ) {
741+ this . _createEndTagToken ( ) ;
742+ this . _reconsumeInState ( TAG_NAME_STATE ) ;
749743 }
750744
751745 else if ( cp === $ . GREATER_THAN_SIGN )
@@ -808,16 +802,9 @@ _[RCDATA_LESS_THAN_SIGN_STATE] = function rcdataLessThanSignState(cp) {
808802//12.2.4.12 RCDATA end tag open state
809803//------------------------------------------------------------------
810804_ [ RCDATA_END_TAG_OPEN_STATE ] = function rcdataEndTagOpenState ( cp ) {
811- if ( isAsciiUpper ( cp ) ) {
812- this . _createEndTagToken ( toAsciiLowerChar ( cp ) ) ;
813- this . tempBuff . push ( cp ) ;
814- this . state = RCDATA_END_TAG_NAME_STATE ;
815- }
816-
817- else if ( isAsciiLower ( cp ) ) {
818- this . _createEndTagToken ( toChar ( cp ) ) ;
819- this . tempBuff . push ( cp ) ;
820- this . state = RCDATA_END_TAG_NAME_STATE ;
805+ if ( isAsciiLetter ( cp ) ) {
806+ this . _createEndTagToken ( ) ;
807+ this . _reconsumeInState ( RCDATA_END_TAG_NAME_STATE ) ;
821808 }
822809
823810 else {
@@ -886,16 +873,9 @@ _[RAWTEXT_LESS_THAN_SIGN_STATE] = function rawtextLessThanSignState(cp) {
886873//12.2.4.15 RAWTEXT end tag open state
887874//------------------------------------------------------------------
888875_ [ RAWTEXT_END_TAG_OPEN_STATE ] = function rawtextEndTagOpenState ( cp ) {
889- if ( isAsciiUpper ( cp ) ) {
890- this . _createEndTagToken ( toAsciiLowerChar ( cp ) ) ;
891- this . tempBuff . push ( cp ) ;
892- this . state = RAWTEXT_END_TAG_NAME_STATE ;
893- }
894-
895- else if ( isAsciiLower ( cp ) ) {
896- this . _createEndTagToken ( toChar ( cp ) ) ;
897- this . tempBuff . push ( cp ) ;
898- this . state = RAWTEXT_END_TAG_NAME_STATE ;
876+ if ( isAsciiLetter ( cp ) ) {
877+ this . _createEndTagToken ( ) ;
878+ this . _reconsumeInState ( RAWTEXT_END_TAG_NAME_STATE ) ;
899879 }
900880
901881 else {
@@ -970,16 +950,9 @@ _[SCRIPT_DATA_LESS_THAN_SIGN_STATE] = function scriptDataLessThanSignState(cp) {
970950//12.2.4.18 Script data end tag open state
971951//------------------------------------------------------------------
972952_ [ SCRIPT_DATA_END_TAG_OPEN_STATE ] = function scriptDataEndTagOpenState ( cp ) {
973- if ( isAsciiUpper ( cp ) ) {
974- this . _createEndTagToken ( toAsciiLowerChar ( cp ) ) ;
975- this . tempBuff . push ( cp ) ;
976- this . state = SCRIPT_DATA_END_TAG_NAME_STATE ;
977- }
978-
979- else if ( isAsciiLower ( cp ) ) {
980- this . _createEndTagToken ( toChar ( cp ) ) ;
981- this . tempBuff . push ( cp ) ;
982- this . state = SCRIPT_DATA_END_TAG_NAME_STATE ;
953+ if ( isAsciiLetter ( cp ) ) {
954+ this . _createEndTagToken ( ) ;
955+ this . _reconsumeInState ( SCRIPT_DATA_END_TAG_NAME_STATE ) ;
983956 }
984957
985958 else {
@@ -1141,20 +1114,10 @@ _[SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE] = function scriptDataEscapedLessThan
11411114 this . state = SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE ;
11421115 }
11431116
1144- else if ( isAsciiUpper ( cp ) ) {
1145- this . tempBuff = [ ] ;
1146- this . tempBuff . push ( toAsciiLowerCodePoint ( cp ) ) ;
1147- this . state = SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE ;
1148- this . _emitChar ( '<' ) ;
1149- this . _emitCodePoint ( cp ) ;
1150- }
1151-
1152- else if ( isAsciiLower ( cp ) ) {
1117+ else if ( isAsciiLetter ( cp ) ) {
11531118 this . tempBuff = [ ] ;
1154- this . tempBuff . push ( cp ) ;
1155- this . state = SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE ;
11561119 this . _emitChar ( '<' ) ;
1157- this . _emitCodePoint ( cp ) ;
1120+ this . _reconsumeInState ( SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE ) ;
11581121 }
11591122
11601123 else {
@@ -1167,16 +1130,9 @@ _[SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE] = function scriptDataEscapedLessThan
11671130//12.2.4.26 Script data escaped end tag open state
11681131//------------------------------------------------------------------
11691132_ [ SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE ] = function scriptDataEscapedEndTagOpenState ( cp ) {
1170- if ( isAsciiUpper ( cp ) ) {
1171- this . _createEndTagToken ( toAsciiLowerChar ( cp ) ) ;
1172- this . tempBuff . push ( cp ) ;
1173- this . state = SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE ;
1174- }
1175-
1176- else if ( isAsciiLower ( cp ) ) {
1177- this . _createEndTagToken ( toChar ( cp ) ) ;
1178- this . tempBuff . push ( cp ) ;
1179- this . state = SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE ;
1133+ if ( isAsciiLetter ( cp ) ) {
1134+ this . _createEndTagToken ( ) ;
1135+ this . _reconsumeInState ( SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE ) ;
11801136 }
11811137
11821138 else {
0 commit comments