Skip to content

Commit 2ece567

Browse files
committed
Avoid code duplication for reading tag names.
1 parent 600449c commit 2ece567

File tree

2 files changed

+33
-77
lines changed

2 files changed

+33
-77
lines changed

lib/location_info/tokenizer_mixin.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,13 @@ exports.assign = function (tokenizer) {
6767
};
6868

6969
//NOTE: patch token creation methods and attach location objects
70-
tokenizer._createStartTagToken = function (tagNameFirstCh) {
71-
tokenizerProto._createStartTagToken.call(this, tagNameFirstCh);
70+
tokenizer._createStartTagToken = function () {
71+
tokenizerProto._createStartTagToken.call(this);
7272
attachLocationInfo(this.currentToken);
7373
};
7474

75-
tokenizer._createEndTagToken = function (tagNameFirstCh) {
76-
tokenizerProto._createEndTagToken.call(this, tagNameFirstCh);
75+
tokenizer._createEndTagToken = function () {
76+
tokenizerProto._createEndTagToken.call(this);
7777
attachLocationInfo(this.currentToken);
7878
};
7979

lib/tokenizer/index.js

Lines changed: 29 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,12 @@ function isAsciiLower(cp) {
110110
return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_Z;
111111
}
112112

113+
function isAsciiLetter(cp) {
114+
return isAsciiLower(cp) || isAsciiUpper(cp);
115+
}
116+
113117
function isAsciiAlphaNumeric(cp) {
114-
return isAsciiDigit(cp) || isAsciiUpper(cp) || isAsciiLower(cp);
118+
return isAsciiLetter(cp) || isAsciiDigit(cp);
115119
}
116120

117121
function isDigit(cp, isHex) {
@@ -337,12 +341,12 @@ Tokenizer.prototype.buildEndTagToken = function (tagName) {
337341
};
338342
};
339343

340-
Tokenizer.prototype._createStartTagToken = function (tagNameFirstCh) {
341-
this.currentToken = this.buildStartTagToken(tagNameFirstCh);
344+
Tokenizer.prototype._createStartTagToken = function () {
345+
this.currentToken = this.buildStartTagToken('');
342346
};
343347

344-
Tokenizer.prototype._createEndTagToken = function (tagNameFirstCh) {
345-
this.currentToken = this.buildEndTagToken(tagNameFirstCh);
348+
Tokenizer.prototype._createEndTagToken = function () {
349+
this.currentToken = this.buildEndTagToken('');
346350
};
347351

348352
Tokenizer.prototype._createCommentToken = function () {
@@ -715,14 +719,9 @@ _[TAG_OPEN_STATE] = function tagOpenState(cp) {
715719
else if (cp === $.SOLIDUS)
716720
this.state = END_TAG_OPEN_STATE;
717721

718-
else if (isAsciiUpper(cp)) {
719-
this._createStartTagToken(toAsciiLowerChar(cp));
720-
this.state = TAG_NAME_STATE;
721-
}
722-
723-
else if (isAsciiLower(cp)) {
724-
this._createStartTagToken(toChar(cp));
725-
this.state = TAG_NAME_STATE;
722+
else if (isAsciiLetter(cp)) {
723+
this._createStartTagToken();
724+
this._reconsumeInState(TAG_NAME_STATE);
726725
}
727726

728727
else if (cp === $.QUESTION_MARK)
@@ -738,14 +737,9 @@ _[TAG_OPEN_STATE] = function tagOpenState(cp) {
738737
//12.2.4.9 End tag open state
739738
//------------------------------------------------------------------
740739
_[END_TAG_OPEN_STATE] = function endTagOpenState(cp) {
741-
if (isAsciiUpper(cp)) {
742-
this._createEndTagToken(toAsciiLowerChar(cp));
743-
this.state = TAG_NAME_STATE;
744-
}
745-
746-
else if (isAsciiLower(cp)) {
747-
this._createEndTagToken(toChar(cp));
748-
this.state = TAG_NAME_STATE;
740+
if (isAsciiLetter(cp)) {
741+
this._createEndTagToken();
742+
this._reconsumeInState(TAG_NAME_STATE);
749743
}
750744

751745
else if (cp === $.GREATER_THAN_SIGN)
@@ -808,16 +802,9 @@ _[RCDATA_LESS_THAN_SIGN_STATE] = function rcdataLessThanSignState(cp) {
808802
//12.2.4.12 RCDATA end tag open state
809803
//------------------------------------------------------------------
810804
_[RCDATA_END_TAG_OPEN_STATE] = function rcdataEndTagOpenState(cp) {
811-
if (isAsciiUpper(cp)) {
812-
this._createEndTagToken(toAsciiLowerChar(cp));
813-
this.tempBuff.push(cp);
814-
this.state = RCDATA_END_TAG_NAME_STATE;
815-
}
816-
817-
else if (isAsciiLower(cp)) {
818-
this._createEndTagToken(toChar(cp));
819-
this.tempBuff.push(cp);
820-
this.state = RCDATA_END_TAG_NAME_STATE;
805+
if (isAsciiLetter(cp)) {
806+
this._createEndTagToken();
807+
this._reconsumeInState(RCDATA_END_TAG_NAME_STATE);
821808
}
822809

823810
else {
@@ -886,16 +873,9 @@ _[RAWTEXT_LESS_THAN_SIGN_STATE] = function rawtextLessThanSignState(cp) {
886873
//12.2.4.15 RAWTEXT end tag open state
887874
//------------------------------------------------------------------
888875
_[RAWTEXT_END_TAG_OPEN_STATE] = function rawtextEndTagOpenState(cp) {
889-
if (isAsciiUpper(cp)) {
890-
this._createEndTagToken(toAsciiLowerChar(cp));
891-
this.tempBuff.push(cp);
892-
this.state = RAWTEXT_END_TAG_NAME_STATE;
893-
}
894-
895-
else if (isAsciiLower(cp)) {
896-
this._createEndTagToken(toChar(cp));
897-
this.tempBuff.push(cp);
898-
this.state = RAWTEXT_END_TAG_NAME_STATE;
876+
if (isAsciiLetter(cp)) {
877+
this._createEndTagToken();
878+
this._reconsumeInState(RAWTEXT_END_TAG_NAME_STATE);
899879
}
900880

901881
else {
@@ -970,16 +950,9 @@ _[SCRIPT_DATA_LESS_THAN_SIGN_STATE] = function scriptDataLessThanSignState(cp) {
970950
//12.2.4.18 Script data end tag open state
971951
//------------------------------------------------------------------
972952
_[SCRIPT_DATA_END_TAG_OPEN_STATE] = function scriptDataEndTagOpenState(cp) {
973-
if (isAsciiUpper(cp)) {
974-
this._createEndTagToken(toAsciiLowerChar(cp));
975-
this.tempBuff.push(cp);
976-
this.state = SCRIPT_DATA_END_TAG_NAME_STATE;
977-
}
978-
979-
else if (isAsciiLower(cp)) {
980-
this._createEndTagToken(toChar(cp));
981-
this.tempBuff.push(cp);
982-
this.state = SCRIPT_DATA_END_TAG_NAME_STATE;
953+
if (isAsciiLetter(cp)) {
954+
this._createEndTagToken();
955+
this._reconsumeInState(SCRIPT_DATA_END_TAG_NAME_STATE);
983956
}
984957

985958
else {
@@ -1141,20 +1114,10 @@ _[SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE] = function scriptDataEscapedLessThan
11411114
this.state = SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE;
11421115
}
11431116

1144-
else if (isAsciiUpper(cp)) {
1145-
this.tempBuff = [];
1146-
this.tempBuff.push(toAsciiLowerCodePoint(cp));
1147-
this.state = SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE;
1148-
this._emitChar('<');
1149-
this._emitCodePoint(cp);
1150-
}
1151-
1152-
else if (isAsciiLower(cp)) {
1117+
else if (isAsciiLetter(cp)) {
11531118
this.tempBuff = [];
1154-
this.tempBuff.push(cp);
1155-
this.state = SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE;
11561119
this._emitChar('<');
1157-
this._emitCodePoint(cp);
1120+
this._reconsumeInState(SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE);
11581121
}
11591122

11601123
else {
@@ -1167,16 +1130,9 @@ _[SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE] = function scriptDataEscapedLessThan
11671130
//12.2.4.26 Script data escaped end tag open state
11681131
//------------------------------------------------------------------
11691132
_[SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE] = function scriptDataEscapedEndTagOpenState(cp) {
1170-
if (isAsciiUpper(cp)) {
1171-
this._createEndTagToken(toAsciiLowerChar(cp));
1172-
this.tempBuff.push(cp);
1173-
this.state = SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE;
1174-
}
1175-
1176-
else if (isAsciiLower(cp)) {
1177-
this._createEndTagToken(toChar(cp));
1178-
this.tempBuff.push(cp);
1179-
this.state = SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE;
1133+
if (isAsciiLetter(cp)) {
1134+
this._createEndTagToken();
1135+
this._reconsumeInState(SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE);
11801136
}
11811137

11821138
else {

0 commit comments

Comments
 (0)