Skip to content

Commit 19bf015

Browse files
committed
Update scanner handling of numeric literal suffixes (Fixes #3281)
The scanner was missing logic to error out in some cases when we have forbidden characters after a numeric literal. The spec states that we cannot have an identifier start or a digit immediately following a numeric literal. - Changed Scanner::FScanNumber to check for forbidden characters following a valid literal. - Added an error message to convey the identifier after literal case. - Removed the unused oFScanNumber function. - Created a scanner test dir since we don't have one. - Added a comprehensive test to test both the following literal and digit case (for octal and binary literals.)
1 parent 7055b37 commit 19bf015

File tree

7 files changed

+142
-116
lines changed

7 files changed

+142
-116
lines changed

lib/Parser/Scan.cpp

Lines changed: 45 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -565,9 +565,25 @@ typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanN
565565
{
566566
EncodedCharPtr last = m_pchLast;
567567
EncodedCharPtr pchT = nullptr;
568+
bool baseSpecified = false;
568569
likelyInt = true;
569570
// Reset
570571
m_OctOrLeadingZeroOnLastTKNumber = false;
572+
573+
auto baseSpecifierCheck = [&pchT, &pdbl, p, &baseSpecified]()
574+
{
575+
if (pchT == p + 2)
576+
{
577+
// An octal token '0' was followed by a base specifier: /0[xXoObB]/
578+
// This literal can no longer be a double
579+
*pdbl = 0;
580+
// Advance the character pointer to the base specifier
581+
pchT = p + 1;
582+
// Set the flag so we know to offset the potential identifier search after the literal
583+
baseSpecified = true;
584+
}
585+
};
586+
571587
if ('0' == this->PeekFirst(p, last))
572588
{
573589
switch(this->PeekFirst(p + 1, last))
@@ -583,37 +599,21 @@ typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanN
583599
case 'X':
584600
// Hex
585601
*pdbl = Js::NumberUtilities::DblFromHex(p + 2, &pchT);
586-
if (pchT == p + 2)
587-
{
588-
// "Octal zero token "0" followed by an identifier token beginning with character 'x'/'X'
589-
*pdbl = 0;
590-
return p + 1;
591-
}
592-
else
593-
return pchT;
602+
baseSpecifierCheck();
603+
goto LIdCheck;
594604
case 'o':
595605
case 'O':
596606
// Octal
597607
*pdbl = Js::NumberUtilities::DblFromOctal(p + 2, &pchT);
598-
if (pchT == p + 2)
599-
{
600-
// "Octal zero token "0" followed by an identifier token beginning with character 'o'/'O'
601-
*pdbl = 0;
602-
return p + 1;
603-
}
604-
return pchT;
608+
baseSpecifierCheck();
609+
goto LIdCheck;
605610

606611
case 'b':
607612
case 'B':
608613
// Binary
609614
*pdbl = Js::NumberUtilities::DblFromBinary(p + 2, &pchT);
610-
if (pchT == p + 2)
611-
{
612-
// "Octal zero token "0" followed by an identifier token beginning with character 'b'/'B'
613-
*pdbl = 0;
614-
return p + 1;
615-
}
616-
return pchT;
615+
baseSpecifierCheck();
616+
goto LIdCheck;
617617

618618
default:
619619
// Octal
@@ -636,113 +636,45 @@ typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanN
636636
m_OctOrLeadingZeroOnLastTKNumber = false; //08... or 09....
637637
goto LFloat;
638638
}
639-
return pchT;
639+
goto LIdCheck;
640640
}
641641
}
642642
else
643643
{
644644
LFloat:
645645
*pdbl = Js::NumberUtilities::StrToDbl(p, &pchT, likelyInt);
646646
Assert(pchT == p || !Js::NumberUtilities::IsNan(*pdbl));
647-
return pchT;
647+
// fall through to LIdCheck
648648
}
649-
}
650649

651-
template <typename EncodingPolicy>
652-
BOOL Scanner<EncodingPolicy>::oFScanNumber(double *pdbl, bool& likelyInt)
653-
{
654-
EncodedCharPtr pchT;
655-
m_OctOrLeadingZeroOnLastTKNumber = false;
656-
likelyInt = true;
657-
if ('0' == *m_currentCharacter)
650+
LIdCheck:
651+
// https://tc39.github.io/ecma262/#sec-literals-numeric-literals
652+
// The SourceCharacter immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit.
653+
// For example : 3in is an error and not the two input elements 3 and in
654+
codepoint_t outChar = 0;
655+
// If a base was speficied, use the first character denoting the constant. In this case, pchT is pointing to the base specifier.
656+
EncodedCharPtr startingLocation = baseSpecified ? pchT + 1 : pchT;
657+
if (this->charClassifier->IsIdStart(*startingLocation))
658658
{
659-
switch (m_currentCharacter[1])
660-
{
661-
case '.':
662-
case 'e':
663-
case 'E':
664-
likelyInt = false;
665-
// Floating point.
666-
goto LFloat;
667-
668-
case 'x':
669-
case 'X':
670-
// Hex.
671-
*pdbl = Js::NumberUtilities::DblFromHex<EncodedChar>(m_currentCharacter + 2, &pchT);
672-
if (pchT == m_currentCharacter + 2)
673-
{
674-
// "Octal zero token "0" followed by an identifier token beginning with character 'x'/'X'
675-
*pdbl = 0;
676-
m_currentCharacter++;
677-
}
678-
else
679-
m_currentCharacter = pchT;
680-
break;
681-
case 'o':
682-
case 'O':
683-
*pdbl = Js::NumberUtilities::DblFromOctal(m_currentCharacter + 2, &pchT);
684-
if (pchT == m_currentCharacter + 2)
685-
{
686-
// "Octal zero token "0" followed by an identifier token beginning with character 'o'/'O'
687-
*pdbl = 0;
688-
m_currentCharacter++;
689-
}
690-
else
691-
m_currentCharacter = pchT;
692-
break;
693-
694-
case 'b':
695-
case 'B':
696-
*pdbl = Js::NumberUtilities::DblFromBinary(m_currentCharacter + 2, &pchT);
697-
if (pchT == m_currentCharacter + 2)
698-
{
699-
// "Octal zero token "0" followed by an identifier token beginning with character 'b'/'B'
700-
*pdbl = 0;
701-
m_currentCharacter++;
702-
}
703-
else
704-
m_currentCharacter = pchT;
705-
break;
706-
707-
default:
708-
// Octal.
709-
*pdbl = Js::NumberUtilities::DblFromOctal(m_currentCharacter, &pchT);
710-
Assert(pchT > m_currentCharacter);
711-
712-
713-
#if !SOURCERELEASE
714-
// If an octal literal is malformed then it is in fact a decimal literal.
715-
#endif // !SOURCERELEASE
716-
if(*pdbl != 0 || pchT > m_currentCharacter + 1)
717-
m_OctOrLeadingZeroOnLastTKNumber = true; //report as an octal or hex for JSON when leading 0. Just '0' is ok
718-
switch (*pchT)
719-
{
720-
case '8':
721-
case '9':
722-
// case 'e':
723-
// case 'E':
724-
// case '.':
725-
m_OctOrLeadingZeroOnLastTKNumber = false; //08... or 09....
726-
goto LFloat;
727-
}
659+
Error(ERRIdAfterLit);
660+
}
728661

729-
m_currentCharacter = pchT;
730-
break;
662+
// IsIdStart does not cover the unicode escape case. Try to read a unicode escape from the 'u' char.
663+
if (*pchT == '\\')
664+
{
665+
startingLocation++; // TryReadEscape expects us to point to the 'u', and since it is by reference we need to do it beforehand.
666+
if (TryReadEscape(startingLocation, m_pchLast, &outChar))
667+
{
668+
Error(ERRIdAfterLit);
731669
}
732670
}
733-
else
734-
{
735-
LFloat:
736-
// Let StrToDbl do all the work.
737671

738-
*pdbl = Js::NumberUtilities::StrToDbl(m_currentCharacter, &pchT, likelyInt);
739-
if (pchT == m_currentCharacter)
740-
return FALSE;
741-
m_currentCharacter = pchT;
742-
Assert(!Js::NumberUtilities::IsNan(*pdbl));
672+
if (Js::NumberUtilities::IsDigit(*startingLocation))
673+
{
674+
Error(ERRbadNumber);
743675
}
744676

745-
return TRUE;
677+
return pchT;
746678
}
747679

748680
template <typename EncodingPolicy>

lib/Parser/Scan.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -767,7 +767,6 @@ class Scanner : public IScanner, public EncodingPolicy
767767
tokens SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef);
768768
tokens ScanRegExpConstant(ArenaAllocator* alloc);
769769
tokens ScanRegExpConstantNoAST(ArenaAllocator* alloc);
770-
BOOL oFScanNumber(double *pdbl, bool& likelyInt);
771770
EncodedCharPtr FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt);
772771
IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar);
773772
IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last);

lib/Parser/perrors.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ LSC_ERROR_MSG( 1013, ERRbadNumber , "Invalid number")
2222
LSC_ERROR_MSG( 1014, ERRillegalChar , "Invalid character")
2323
LSC_ERROR_MSG( 1015, ERRnoStrEnd , "Unterminated string constant")
2424
LSC_ERROR_MSG( 1016, ERRnoCmtEnd , "Unterminated comment")
25+
LSC_ERROR_MSG( 1017, ERRIdAfterLit , "Unexpected identifier after numeric literal")
2526

2627
LSC_ERROR_MSG( 1018, ERRbadReturn , "'return' statement outside of function")
2728
LSC_ERROR_MSG( 1019, ERRbadBreak , "Can't have 'break' outside of loop")

test/Scanner/NumericLiteralSuffix.js

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
//-------------------------------------------------------------------------------------------------------
2+
// Copyright (C) Microsoft. All rights reserved.
3+
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
4+
//-------------------------------------------------------------------------------------------------------
5+
6+
WScript.LoadScriptFile("..\\UnitTestFramework\\UnitTestFramework.js");
7+
8+
// https://tc39.github.io/ecma262/#sec-reserved-words
9+
let keywords = ['await', 'break', 'case', 'catch', 'class', 'const', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'export', 'extends', 'finally', 'for', 'function', 'if', 'import', 'in', 'instanceof', 'new', 'return', 'super', 'switch', 'this', 'throw', 'try', 'typeof', 'var', 'void', 'while', 'with', 'yield'];
10+
let futureReservedWords = ['enum', 'implements', 'package', 'protected ', 'interface', 'private', 'public'];
11+
12+
// https://tc39.github.io/ecma262/#sec-names-and-keywords
13+
let idStarts = ["\u{50}", '$', '_', "\\u{50}"];
14+
15+
// https://tc39.github.io/ecma262/#sec-literals-numeric-literals
16+
let literalClasses = {
17+
'Decimal Integer Literal': [
18+
'0', '1', '123',
19+
'0.1', '1.1', '123.1', '123.123',
20+
'0e1', '1e1', '1e+1', '1e-1',
21+
'0E1', '1E1', '1E+1', '1E-1',
22+
'123e123', '123e+123', '123e-123',
23+
'123E123', '123E+123', '123E-123'
24+
],
25+
'Binary Integer Literal': [
26+
'0b0', '0b1', '0b010101',
27+
'0B0', '0B1', '0B010101',
28+
],
29+
'Octal Integer Literal': [
30+
'0o0', '0o1', '0o123',
31+
'0O0', '0O1', '0O123'
32+
],
33+
'Hex Integer Literal': [
34+
'0x0', '0x1', '0x123', '0xabc', '0xABC', '0x123abc', '0x123ABC',
35+
'0X0', '0X1', '0X123', '0Xabc', '0XABC', '0X123abc', '0X123ABC'
36+
]
37+
};
38+
39+
var tests = [
40+
{
41+
name: "Numeric literal followed by an identifier start throws",
42+
body: function () {
43+
for (let literalClass in literalClasses) {
44+
for (let literal of literalClasses[literalClass]) {
45+
for (let idStart of idStarts) {
46+
for (let keyword of keywords) {
47+
assert.throws(function () { eval(`${literal}${keyword}`); }, SyntaxError, `Keyword '${keyword}' directly after ${literalClass} '${literal}' throws`, "Unexpected identifier after numeric literal");
48+
}
49+
for (let futureReservedWord of futureReservedWords) {
50+
assert.throws(function () { eval(`${literal}${futureReservedWord}`); }, SyntaxError, `Future reserved word '${futureReservedWord}' directly after ${literalClass} '${literal}' throws`, "Unexpected identifier after numeric literal");
51+
}
52+
for (let idStart of idStarts) {
53+
assert.throws(function () { eval(`${literal}${idStart}`); }, SyntaxError, `Identifier start '${idStart}' directly after ${literalClass} '${literal}' throws`, "Unexpected identifier after numeric literal");
54+
}
55+
}
56+
}
57+
}
58+
}
59+
},
60+
{
61+
name: "Numeric literal followed by invalid digit throws",
62+
body: function () {
63+
let nonOctalDigits = ['8', '9'];
64+
for (let literal of literalClasses['Octal Integer Literal']) {
65+
for (let nonOctalDigit of nonOctalDigits) {
66+
assert.throws(function () { eval(`${literal}${nonOctalDigit}`); }, SyntaxError, `Non-octal digit '${nonOctalDigit}' directly after Octal Integer Literal '${literal}' throws`, "Invalid number");
67+
}
68+
}
69+
70+
let nonBinaryDigits = ['2', '3', '4', '5', '6', '7', '8', '9'];
71+
for (let literal of literalClasses['Binary Integer Literal']) {
72+
for (let nonBinaryDigit of nonBinaryDigits) {
73+
assert.throws(function () { eval(`${literal}${nonBinaryDigit}`); }, SyntaxError, `Non-binary digit '${nonBinaryDigit}' directly after Binary Integer Literal '${literal}' throws`, "Invalid number");
74+
}
75+
}
76+
}
77+
}
78+
];
79+
80+
testRunner.runTests(tests, { verbose: WScript.Arguments[0] != "summary" });

test/Scanner/rlexe.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<regress-exe>
3+
<test>
4+
<default>
5+
<files>NumericLiteralSuffix.js</files>
6+
<compile-flags>-args summary -endargs</compile-flags>
7+
</default>
8+
</test>
9+
</regress-exe>

test/es6/bug_OS12095746.baseline

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
NotifyModuleReadyCallback(exception) bug_OS12095746_mod0.js
22
NotifyModuleReadyCallback(exception) bug_OS12095746_mod1.js
33
mod0 catch:Syntax error
4-
mod1 catch:Expected ';'
4+
mod1 catch:Unexpected identifier after numeric literal
55
NotifyModuleReadyCallback(exception) bug_OS12095746_mod2.js
6-
mod2 catch:Expected ';'
6+
mod2 catch:Unexpected identifier after numeric literal

test/rlexedirs.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,4 +321,9 @@
321321
<tags>sequential,exclude_dynapogo,exclude_jshost,exclude_snap,exclude_serialized,require_debugger</tags>
322322
</default>
323323
</dir>
324+
<dir>
325+
<default>
326+
<files>Scanner</files>
327+
</default>
328+
</dir>
324329
</regress-exe>

0 commit comments

Comments
 (0)