Skip to content

Commit 4a7231a

Browse files
committed
Implement RegExp dotAll flag
1 parent 45a6271 commit 4a7231a

28 files changed

+332
-32
lines changed

lib/Common/ConfigFlagsList.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,7 @@ PHASE(All)
640640
#define DEFAULT_CONFIG_ES6UnicodeVerbose (true)
641641
#define DEFAULT_CONFIG_ES6Unscopables (true)
642642
#define DEFAULT_CONFIG_ES6RegExSticky (true)
643+
#define DEFAULT_CONFIG_ES2018RegExDotAll (true)
643644
#ifdef COMPILE_DISABLE_ES6RegExPrototypeProperties
644645
// If ES6RegExPrototypeProperties needs to be disabled by compile flag, DEFAULT_CONFIG_ES6RegExPrototypeProperties should be false
645646
#define DEFAULT_CONFIG_ES6RegExPrototypeProperties (false)
@@ -1135,6 +1136,7 @@ FLAGPR (Boolean, ES6, ES6Unicode , "Enable ES6 Unicode 6.0
11351136
FLAGPR (Boolean, ES6, ES6UnicodeVerbose , "Enable ES6 Unicode 6.0 verbose failure output" , DEFAULT_CONFIG_ES6UnicodeVerbose)
11361137
FLAGPR (Boolean, ES6, ES6Unscopables , "Enable ES6 With Statement Unscopables" , DEFAULT_CONFIG_ES6Unscopables)
11371138
FLAGPR (Boolean, ES6, ES6RegExSticky , "Enable ES6 RegEx sticky flag" , DEFAULT_CONFIG_ES6RegExSticky)
1139+
FLAGPR (Boolean, ES6, ES2018RegExDotAll , "Enable ES2018 RegEx dotAll flag" , DEFAULT_CONFIG_ES2018RegExDotAll)
11381140

11391141
#ifndef COMPILE_DISABLE_ES6RegExPrototypeProperties
11401142
#define COMPILE_DISABLE_ES6RegExPrototypeProperties 0

lib/Parser/RegexFlags.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ namespace UnifiedRegex
1414
MultilineRegexFlag = 1 << 2,
1515
UnicodeRegexFlag = 1 << 3,
1616
StickyRegexFlag = 1 << 4,
17-
AllRegexFlags = (1 << 5) - 1
17+
DotAllRegexFlag = 1 << 5,
18+
AllRegexFlags = (1 << 6) - 1
1819
};
1920
}

lib/Parser/RegexParser.cpp

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ namespace UnifiedRegex
145145
, tempLocationOfRange(nullptr)
146146
, codePointAtTempLocation(0)
147147
, unicodeFlagPresent(false)
148+
, dotAllFlagPresent(false)
148149
, caseInsensitiveFlagPresent(false)
149150
, positionAfterLastSurrogate(nullptr)
150151
, valueOfLastSurrogate(INVALID_CODEPOINT)
@@ -2758,6 +2759,16 @@ namespace UnifiedRegex
27582759
}
27592760
flags = (RegexFlags)(flags | MultilineRegexFlag);
27602761
break;
2762+
case 's':
2763+
if (scriptContext->GetConfig()->IsES2018RegExDotAllEnabled())
2764+
{
2765+
if ((flags & DotAllRegexFlag) != 0)
2766+
{
2767+
Fail(JSERR_RegExpSyntax);
2768+
}
2769+
flags = (RegexFlags)(flags | DotAllRegexFlag);
2770+
break;
2771+
}
27612772
case 'u':
27622773
// If we don't have unicode enabled, fall through to default
27632774
if (scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled())
@@ -2832,12 +2843,15 @@ namespace UnifiedRegex
28322843
Fail(JSERR_RegExpSyntax);
28332844
this->unicodeFlagPresent = (flags & UnifiedRegex::UnicodeRegexFlag) == UnifiedRegex::UnicodeRegexFlag;
28342845
this->caseInsensitiveFlagPresent = (flags & UnifiedRegex::IgnoreCaseRegexFlag) == UnifiedRegex::IgnoreCaseRegexFlag;
2846+
this->dotAllFlagPresent = (flags & UnifiedRegex::DotAllRegexFlag) == UnifiedRegex::DotAllRegexFlag;
28352847
Assert(!this->unicodeFlagPresent || scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled());
2848+
Assert(!this->dotAllFlagPresent || scriptContext->GetConfig()->IsES2018RegExDotAllEnabled());
28362849
}
28372850
else
28382851
{
28392852
this->unicodeFlagPresent = false;
28402853
this->caseInsensitiveFlagPresent = false;
2854+
this->dotAllFlagPresent = false;
28412855
}
28422856

28432857
// If this HR has been set, that means we have an earlier failure than the one caught above.
@@ -2891,6 +2905,7 @@ namespace UnifiedRegex
28912905
Options(flags);
28922906
this->unicodeFlagPresent = (flags & UnifiedRegex::UnicodeRegexFlag) == UnifiedRegex::UnicodeRegexFlag;
28932907
this->caseInsensitiveFlagPresent = (flags & UnifiedRegex::IgnoreCaseRegexFlag) == UnifiedRegex::IgnoreCaseRegexFlag;
2908+
this->dotAllFlagPresent = (flags & UnifiedRegex::DotAllRegexFlag) == UnifiedRegex::DotAllRegexFlag;
28942909
Assert(!this->unicodeFlagPresent || scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled());
28952910

28962911
// If this HR has been set, that means we have an earlier failure than the one caught above.
@@ -2946,6 +2961,7 @@ namespace UnifiedRegex
29462961
Options(dummyFlags);
29472962
this->unicodeFlagPresent = (dummyFlags & UnifiedRegex::UnicodeRegexFlag) == UnifiedRegex::UnicodeRegexFlag;
29482963
this->caseInsensitiveFlagPresent = (dummyFlags & UnifiedRegex::IgnoreCaseRegexFlag) == UnifiedRegex::IgnoreCaseRegexFlag;
2964+
this->dotAllFlagPresent = (dummyFlags & UnifiedRegex::DotAllRegexFlag) == UnifiedRegex::DotAllRegexFlag;
29492965
outTotalEncodedChars = Chars<EncodedChar>::OSB(next, input);
29502966
outTotalChars = Pos();
29512967

@@ -3101,7 +3117,14 @@ namespace UnifiedRegex
31013117
switch (cc)
31023118
{
31033119
case '.':
3104-
standardChars->SetNonNewline(ctAllocator, partialPrefixSetNode->set);
3120+
if (this->dotAllFlagPresent)
3121+
{
3122+
standardChars->SetFullSet(ctAllocator, partialPrefixSetNode->set);
3123+
}
3124+
else
3125+
{
3126+
standardChars->SetNonNewline(ctAllocator, partialPrefixSetNode->set);
3127+
}
31053128
break;
31063129
case 'S':
31073130
standardChars->SetNonWhitespace(ctAllocator, partialPrefixSetNode->set);
@@ -3137,7 +3160,14 @@ namespace UnifiedRegex
31373160
switch (cc)
31383161
{
31393162
case '.':
3140-
standardChars->SetNonNewline(ctAllocator, setNode->set);
3163+
if (this->dotAllFlagPresent)
3164+
{
3165+
standardChars->SetFullSet(ctAllocator, setNode->set);
3166+
}
3167+
else
3168+
{
3169+
standardChars->SetNonNewline(ctAllocator, setNode->set);
3170+
}
31413171
break;
31423172
case 'S':
31433173
standardChars->SetNonWhitespace(ctAllocator, setNode->set);

lib/Parser/RegexParser.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ namespace UnifiedRegex
107107
SurrogatePairTracker* currentSurrogatePairNode;
108108
bool unicodeFlagPresent;
109109
bool caseInsensitiveFlagPresent;
110+
bool dotAllFlagPresent;
110111

111112
// The following two variables are used to determine if the the surrogate pair has been encountered
112113
// First holds the temporary location, second holds the value of the codepoint

lib/Parser/RegexPattern.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ namespace UnifiedRegex
9393
return (rep.unified.program->flags & IgnoreCaseRegexFlag) != 0;
9494
}
9595

96+
bool RegexPattern::IsDotAll() const
97+
{
98+
return GetScriptContext()->GetConfig()->IsES2018RegExDotAllEnabled() && (rep.unified.program->flags & DotAllRegexFlag) != 0;
99+
}
100+
96101
bool RegexPattern::IsGlobal() const
97102
{
98103
return (rep.unified.program->flags & GlobalRegexFlag) != 0;
@@ -195,6 +200,8 @@ namespace UnifiedRegex
195200
w->Print(_u("g"));
196201
if (IsMultiline())
197202
w->Print(_u("m"));
203+
if (IsDotAll())
204+
w->Print(_u("s"));
198205
if (IsUnicode())
199206
w->Print(_u("u"));
200207
if (IsSticky())

lib/Parser/RegexPattern.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ namespace UnifiedRegex
6262
bool IsIgnoreCase() const;
6363
bool IsGlobal() const;
6464
bool IsMultiline() const;
65+
bool IsDotAll() const;
6566
bool IsUnicode() const;
6667
bool IsSticky() const;
6768
bool WasLastMatchSuccessful() const;

lib/Parser/RegexRuntime.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5755,9 +5755,10 @@ namespace UnifiedRegex
57555755
w->Print(_u("flags: "));
57565756
if ((flags & GlobalRegexFlag) != 0) w->Print(_u("global "));
57575757
if ((flags & MultilineRegexFlag) != 0) w->Print(_u("multiline "));
5758-
if ((flags & IgnoreCaseRegexFlag) != 0) w->Print(_u("ignorecase"));
5759-
if ((flags & UnicodeRegexFlag) != 0) w->Print(_u("unicode"));
5760-
if ((flags & StickyRegexFlag) != 0) w->Print(_u("sticky"));
5758+
if ((flags & IgnoreCaseRegexFlag) != 0) w->Print(_u("ignorecase "));
5759+
if ((flags & DotAllRegexFlag) != 0) w->Print(_u("dotAll "));
5760+
if ((flags & UnicodeRegexFlag) != 0) w->Print(_u("unicode "));
5761+
if ((flags & StickyRegexFlag) != 0) w->Print(_u("sticky "));
57615762
w->EOL();
57625763
w->PrintEOL(_u("numGroups: %d"), numGroups);
57635764
w->PrintEOL(_u("numLoops: %d"), numLoops);

lib/Parser/StandardChars.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,11 @@ END {
261261
set.SetNotRanges(setAllocator, numNewlinePairs, newlineStr);
262262
}
263263

264+
void StandardChars<char16>::SetFullSet(ArenaAllocator* setAllocator, CharSet<Char> &set)
265+
{
266+
set.SetNotRanges(allocator, 0, nullptr);
267+
}
268+
264269
CharSet<char16>* StandardChars<char16>::GetFullSet()
265270
{
266271
if (fullSet == 0)

lib/Parser/StandardChars.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ namespace UnifiedRegex
323323
void SetNonWordIUChars(ArenaAllocator* setAllocator, CharSet<Char> &set);
324324
void SetNewline(ArenaAllocator* setAllocator, CharSet<Char> &set);
325325
void SetNonNewline(ArenaAllocator* setAllocator, CharSet<Char> &set);
326+
void SetFullSet(ArenaAllocator* setAllocator, CharSet<Char> &set);
326327

327328
CharSet<Char>* GetFullSet();
328329
CharSet<Char>* GetEmptySet();

lib/Runtime/Base/JnDirectFields.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,7 @@ ENTRY(compile)
422422
ENTRY(global)
423423
ENTRY(lastIndex)
424424
ENTRY(multiline)
425+
ENTRY(dotAll)
425426
ENTRY(ignoreCase)
426427
ENTRY(unicode)
427428
ENTRY(sticky)

lib/Runtime/Base/ThreadConfigFlagsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ FLAG_RELEASE(IsES6ToStringTagEnabled, ES6ToStringTag)
3838
FLAG_RELEASE(IsES6UnicodeExtensionsEnabled, ES6Unicode)
3939
FLAG_RELEASE(IsES6UnscopablesEnabled, ES6Unscopables)
4040
FLAG_RELEASE(IsES6RegExStickyEnabled, ES6RegExSticky)
41+
FLAG_RELEASE(IsES2018RegExDotAllEnabled, ES2018RegExDotAll)
4142
FLAG_RELEASE(IsES6RegExPrototypePropertiesEnabled, ES6RegExPrototypeProperties)
4243
FLAG_RELEASE(IsES6RegExSymbolsEnabled, ES6RegExSymbols)
4344
FLAG_RELEASE(IsES6HasInstanceEnabled, ES6HasInstance)

lib/Runtime/Library/JavascriptBuiltInFunctionList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ BUILTIN(JavascriptRegExp, GetterOptions, EntryGetterOptions, FunctionInfo::Error
219219
BUILTIN(JavascriptRegExp, GetterSource, EntryGetterSource, FunctionInfo::ErrorOnNew | FunctionInfo::HasNoSideEffect)
220220
BUILTIN(JavascriptRegExp, GetterSticky, EntryGetterSticky, FunctionInfo::ErrorOnNew | FunctionInfo::HasNoSideEffect)
221221
BUILTIN(JavascriptRegExp, GetterUnicode, EntryGetterUnicode, FunctionInfo::ErrorOnNew | FunctionInfo::HasNoSideEffect)
222+
BUILTIN(JavascriptRegExp, GetterDotAll, EntryGetterDotAll, FunctionInfo::ErrorOnNew | FunctionInfo::HasNoSideEffect)
222223
BUILTIN(JavascriptString, NewInstance, NewInstance, FunctionInfo::SkipDefaultNewObject)
223224
BUILTIN(JavascriptString, CharAt, EntryCharAt, FunctionInfo::ErrorOnNew)
224225
BUILTIN(JavascriptString, CharCodeAt, EntryCharCodeAt, FunctionInfo::ErrorOnNew)

lib/Runtime/Library/JavascriptLibrary.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3747,7 +3747,7 @@ namespace Js
37473747

37483748
bool JavascriptLibrary::InitializeRegexPrototype(DynamicObject* regexPrototype, DeferredTypeHandlerBase * typeHandler, DeferredInitializeMode mode)
37493749
{
3750-
typeHandler->Convert(regexPrototype, mode, 24);
3750+
typeHandler->Convert(regexPrototype, mode, 26);
37513751
// Note: Any new function addition/deletion/modification should also be updated in JavascriptLibrary::ProfilerRegisterRegExp
37523752
// so that the update is in sync with profiler
37533753
JavascriptFunction * func;
@@ -3800,9 +3800,18 @@ namespace Js
38003800
{
38013801
library->regexUnicodeGetterFunction =
38023802
library->AddGetterToLibraryObject(regexPrototype, PropertyIds::unicode, &JavascriptRegExp::EntryInfo::GetterUnicode);
3803-
library->regexUnicodeGetterSlotIndex = 19;
3803+
library->regexUnicodeGetterSlotIndex = scriptConfig->IsES6RegExStickyEnabled() ? 19 : 17;
38043804
Assert(regexPrototype->GetSlot(library->regexUnicodeGetterSlotIndex) == library->regexUnicodeGetterFunction);
38053805
}
3806+
3807+
if (scriptConfig->IsES2018RegExDotAllEnabled())
3808+
{
3809+
library->regexDotAllGetterFunction =
3810+
library->AddGetterToLibraryObject(regexPrototype, PropertyIds::dotAll, &JavascriptRegExp::EntryInfo::GetterDotAll);
3811+
library->regexDotAllGetterSlotIndex = 21 -
3812+
(scriptConfig->IsES6UnicodeExtensionsEnabled() ? 0 : 2) - (scriptConfig->IsES6RegExStickyEnabled() ? 0 : 2);
3813+
Assert(regexPrototype->GetSlot(library->regexDotAllGetterSlotIndex) == library->regexDotAllGetterFunction);
3814+
}
38063815
}
38073816

38083817
if (scriptConfig->IsES6RegExSymbolsEnabled())

lib/Runtime/Library/JavascriptLibrary.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ namespace Js
425425
Field(JavascriptFunction*) regexFlagsGetterFunction;
426426
Field(JavascriptFunction*) regexGlobalGetterFunction;
427427
Field(JavascriptFunction*) regexStickyGetterFunction;
428+
Field(JavascriptFunction*) regexDotAllGetterFunction;
428429
Field(JavascriptFunction*) regexUnicodeGetterFunction;
429430

430431
Field(RuntimeFunction*) sharedArrayBufferConstructor;
@@ -452,6 +453,7 @@ namespace Js
452453
Field(int) regexFlagsGetterSlotIndex;
453454
Field(int) regexGlobalGetterSlotIndex;
454455
Field(int) regexStickyGetterSlotIndex;
456+
Field(int) regexDotAllGetterSlotIndex;
455457
Field(int) regexUnicodeGetterSlotIndex;
456458

457459
mutable Field(CharStringCache) charStringCache;
@@ -776,13 +778,15 @@ namespace Js
776778
JavascriptFunction* GetRegexFlagsGetterFunction() const { return regexFlagsGetterFunction; }
777779
JavascriptFunction* GetRegexGlobalGetterFunction() const { return regexGlobalGetterFunction; }
778780
JavascriptFunction* GetRegexStickyGetterFunction() const { return regexStickyGetterFunction; }
781+
JavascriptFunction* GetRegexDotAllGetterFunction() const { return regexDotAllGetterFunction; }
779782
JavascriptFunction* GetRegexUnicodeGetterFunction() const { return regexUnicodeGetterFunction; }
780783

781784
int GetRegexConstructorSlotIndex() const { return regexConstructorSlotIndex; }
782785
int GetRegexExecSlotIndex() const { return regexExecSlotIndex; }
783786
int GetRegexFlagsGetterSlotIndex() const { return regexFlagsGetterSlotIndex; }
784787
int GetRegexGlobalGetterSlotIndex() const { return regexGlobalGetterSlotIndex; }
785788
int GetRegexStickyGetterSlotIndex() const { return regexStickyGetterSlotIndex; }
789+
int GetRegexDotAllGetterSlotIndex() const { return regexDotAllGetterSlotIndex; }
786790
int GetRegexUnicodeGetterSlotIndex() const { return regexUnicodeGetterSlotIndex; }
787791

788792
TypePath* GetRootPath() const { return rootPath; }

0 commit comments

Comments
 (0)