forked from MihaZupan/runtime-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
Job completed in 19 minutes 22 seconds (remote runner delay: 1 minute 38 seconds).
dotnet/runtime#124881
Using arguments: regexdiff
Main commit: dotnet/runtime@133c7bd
PR commit: danmoseley/runtime@5aa5d8b
396 out of 18857 patterns have generated source code changes.
Examples of GeneratedRegex source diffs
"[^'\",]+'[^^']+'|[^'\",]+\"[^\"]+\"|[^,]+" (21563 uses)
[GeneratedRegex("[^'\",]+'[^^']+'|[^'\",]+\"[^\"]+\"|[^,]+")]
/// <code>[^'",]+'[^^']+'|[^'",]+"[^"]+"|[^,]+</code><br/>
/// Explanation:<br/>
/// <code>
- /// ○ Match with 3 alternative expressions, atomically.<br/>
+ /// ○ Match with 2 alternative expressions, atomically.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a character in the set [^"',] atomically at least once.<br/>
- /// ○ Match '\''.<br/>
- /// ○ Match a character in the set [^'^] atomically at least once.<br/>
- /// ○ Match '\''.<br/>
- /// ○ Match a sequence of expressions.<br/>
- /// ○ Match a character in the set [^"',] atomically at least once.<br/>
- /// ○ Match '"'.<br/>
- /// ○ Match a character other than '"' atomically at least once.<br/>
- /// ○ Match '"'.<br/>
+ /// ○ Match with 2 alternative expressions, atomically.<br/>
+ /// ○ Match a sequence of expressions.<br/>
+ /// ○ Match '\''.<br/>
+ /// ○ Match a character in the set [^'^] atomically at least once.<br/>
+ /// ○ Match '\''.<br/>
+ /// ○ Match a sequence of expressions.<br/>
+ /// ○ Match '"'.<br/>
+ /// ○ Match a character other than '"' atomically at least once.<br/>
+ /// ○ Match '"'.<br/>
/// ○ Match a character other than ',' atomically at least once.<br/>
/// </code>
/// </remarks>
int matchStart = pos;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
- // Match with 3 alternative expressions, atomically.
+ // Match with 2 alternative expressions, atomically.
{
int alternation_starting_pos = pos;
pos += iteration;
}
- // Match '\''.
- if (slice.IsEmpty || slice[0] != '\'')
+ // Match with 2 alternative expressions, atomically.
{
- goto AlternationBranch;
- }
-
- // Match a character in the set [^'^] atomically at least once.
- {
- int iteration1 = slice.Slice(1).IndexOfAny('\'', '^');
- if (iteration1 < 0)
- {
- iteration1 = slice.Length - 1;
- }
-
- if (iteration1 == 0)
+ if (slice.IsEmpty)
{
goto AlternationBranch;
}
- slice = slice.Slice(iteration1);
- pos += iteration1;
+ switch (slice[0])
+ {
+ case '\'':
+
+ // Match a character in the set [^'^] atomically at least once.
+ {
+ int iteration1 = slice.Slice(1).IndexOfAny('\'', '^');
+ if (iteration1 < 0)
+ {
+ iteration1 = slice.Length - 1;
+ }
+
+ if (iteration1 == 0)
+ {
+ goto AlternationBranch;
+ }
+
+ slice = slice.Slice(iteration1);
+ pos += iteration1;
+ }
+
+ // Match '\''.
+ if ((uint)slice.Length < 2 || slice[1] != '\'')
+ {
+ goto AlternationBranch;
+ }
+
+ pos += 2;
+ slice = inputSpan.Slice(pos);
+ break;
+
+ case '"':
+
+ // Match a character other than '"' atomically at least once.
+ {
+ int iteration2 = slice.Slice(1).IndexOf('"');
+ if (iteration2 < 0)
+ {
+ iteration2 = slice.Length - 1;
+ }
+
+ if (iteration2 == 0)
+ {
+ goto AlternationBranch;
+ }
+
+ slice = slice.Slice(iteration2);
+ pos += iteration2;
+ }
+
+ // Match '"'.
+ if ((uint)slice.Length < 2 || slice[1] != '"')
+ {
+ goto AlternationBranch;
+ }
+
+ pos += 2;
+ slice = inputSpan.Slice(pos);
+ break;
+
+ default:
+ goto AlternationBranch;
+ }
}
- // Match '\''.
- if ((uint)slice.Length < 2 || slice[1] != '\'')
- {
- goto AlternationBranch;
- }
-
- pos += 2;
- slice = inputSpan.Slice(pos);
goto AlternationMatch;
AlternationBranch:
// Branch 1
{
- // Match a character in the set [^"',] atomically at least once.
+ // Match a character other than ',' atomically at least once.
{
- int iteration2 = slice.IndexOfAny('"', '\'', ',');
- if (iteration2 < 0)
- {
- iteration2 = slice.Length;
- }
-
- if (iteration2 == 0)
- {
- goto AlternationBranch1;
- }
-
- slice = slice.Slice(iteration2);
- pos += iteration2;
- }
-
- // Match '"'.
- if (slice.IsEmpty || slice[0] != '"')
- {
- goto AlternationBranch1;
- }
-
- // Match a character other than '"' atomically at least once.
- {
- int iteration3 = slice.Slice(1).IndexOf('"');
+ int iteration3 = slice.IndexOf(',');
if (iteration3 < 0)
{
- iteration3 = slice.Length - 1;
+ iteration3 = slice.Length;
}
if (iteration3 == 0)
{
- goto AlternationBranch1;
+ return false; // The input didn't match.
}
slice = slice.Slice(iteration3);
pos += iteration3;
}
- // Match '"'.
- if ((uint)slice.Length < 2 || slice[1] != '"')
- {
- goto AlternationBranch1;
- }
-
- pos += 2;
- slice = inputSpan.Slice(pos);
- goto AlternationMatch;
-
- AlternationBranch1:
- pos = alternation_starting_pos;
- slice = inputSpan.Slice(pos);
- }
-
- // Branch 2
- {
- // Match a character other than ',' atomically at least once.
- {
- int iteration4 = slice.IndexOf(',');
- if (iteration4 < 0)
- {
- iteration4 = slice.Length;
- }
-
- if (iteration4 == 0)
- {
- return false; // The input didn't match.
- }
-
- slice = slice.Slice(iteration4);
- pos += iteration4;
- }
-
}
AlternationMatch:;
"^(http|https)\\://[a-zA-Z0-9\\-\\.]+(:[a-zA- ..." (821 uses)
[GeneratedRegex("^(http|https)\\://[a-zA-Z0-9\\-\\.]+(:[a-zA-Z0-9]*)?(/[a-zA-Z0-9\\-\\._]*)*$", RegexOptions.IgnoreCase)]
/// ○ 1st capture group.<br/>
/// ○ Match a character in the set [Hh].<br/>
/// ○ Match a character in the set [Tt] exactly 2 times.<br/>
- /// ○ Match with 2 alternative expressions.<br/>
- /// ○ Match a character in the set [Pp].<br/>
- /// ○ Match a sequence of expressions.<br/>
- /// ○ Match a character in the set [Pp].<br/>
- /// ○ Match a character in the set [Ss].<br/>
+ /// ○ Match a character in the set [Pp].<br/>
+ /// ○ Match a character in the set [Ss] atomically, optionally.<br/>
/// ○ Match the string "://".<br/>
/// ○ Match a character in the set [\-.0-9A-Za-z\u212A] greedily at least once.<br/>
/// ○ Optional (greedy).<br/>
{
int pos = base.runtextpos;
int matchStart = pos;
- int alternation_branch = 0;
- int alternation_starting_capturepos = 0;
- int alternation_starting_pos = 0;
int capture_starting_pos = 0;
int charloop_capture_pos = 0;
int charloop_starting_pos = 0, charloop_ending_pos = 0;
}
// 1st capture group.
- //{
+ {
capture_starting_pos = pos;
- if ((uint)slice.Length < 3 ||
- !slice.StartsWith("htt", StringComparison.OrdinalIgnoreCase)) // Match the string "htt" (ordinal case-insensitive)
+ if ((uint)slice.Length < 4 ||
+ !slice.StartsWith("http", StringComparison.OrdinalIgnoreCase)) // Match the string "http" (ordinal case-insensitive)
{
UncaptureUntil(0);
return false; // The input didn't match.
}
- // Match with 2 alternative expressions.
- //{
- alternation_starting_pos = pos;
- alternation_starting_capturepos = base.Crawlpos();
-
- // Branch 0
- //{
- // Match a character in the set [Pp].
- if ((uint)slice.Length < 4 || ((slice[3] | 0x20) != 'p'))
- {
- goto AlternationBranch;
- }
-
- alternation_branch = 0;
- pos += 4;
- slice = inputSpan.Slice(pos);
- goto AlternationMatch;
-
- AlternationBranch:
- pos = alternation_starting_pos;
- slice = inputSpan.Slice(pos);
- UncaptureUntil(alternation_starting_capturepos);
- //}
-
- // Branch 1
- //{
- if ((uint)slice.Length < 5 ||
- !slice.Slice(3).StartsWith("ps", StringComparison.OrdinalIgnoreCase)) // Match the string "ps" (ordinal case-insensitive)
- {
- UncaptureUntil(0);
- return false; // The input didn't match.
- }
-
- alternation_branch = 1;
- pos += 5;
- slice = inputSpan.Slice(pos);
- goto AlternationMatch;
- //}
-
- AlternationBacktrack:
- if (Utilities.s_hasTimeout)
+ // Match a character in the set [Ss] atomically, optionally.
+ {
+ if ((uint)slice.Length > (uint)4 && ((slice[4] | 0x20) == 's'))
{
- base.CheckTimeout();
+ slice = slice.Slice(1);
+ pos++;
}
-
- switch (alternation_branch)
- {
- case 0:
- goto AlternationBranch;
- case 1:
- UncaptureUntil(0);
- return false; // The input didn't match.
- }
-
- AlternationMatch:;
- //}
+ }
+ pos += 4;
+ slice = inputSpan.Slice(pos);
base.Capture(1, capture_starting_pos, pos);
-
- goto CaptureSkipBacktrack;
-
- CaptureBacktrack:
- goto AlternationBacktrack;
-
- CaptureSkipBacktrack:;
- //}
+ }
// Match the string "://".
if (!slice.StartsWith("://"))
{
- goto CaptureBacktrack;
+ UncaptureUntil(0);
+ return false; // The input didn't match.
}
// Match a character in the set [\-.0-9A-Za-z\u212A] greedily at least once.
if (iteration == 0)
{
- goto CaptureBacktrack;
+ UncaptureUntil(0);
+ return false; // The input didn't match.
}
slice = slice.Slice(iteration);
if (charloop_starting_pos >= charloop_ending_pos)
{
- goto CaptureBacktrack;
+ UncaptureUntil(0);
+ return false; // The input didn't match.
}
pos = --charloop_ending_pos;
slice = inputSpan.Slice(pos);
base.Capture(2, capture_starting_pos1, pos);
Utilities.StackPush(ref base.runstack!, ref stackpos, capture_starting_pos1);
- goto CaptureSkipBacktrack1;
+ goto CaptureSkipBacktrack;
- CaptureBacktrack1:
+ CaptureBacktrack:
capture_starting_pos1 = base.runstack![--stackpos];
goto CharLoopBacktrack1;
- CaptureSkipBacktrack1:;
+ CaptureSkipBacktrack:;
//}
// No iterations of the loop remain to backtrack into. Fail the loop.
goto CharLoopBacktrack;
}
- goto CaptureBacktrack1;
+ goto CaptureBacktrack;
LoopEnd:;
//}
"^[\\s\\S]+?(?=[\\\\<!\\[*`~\\:]|\\b_|\\bhttp ..." (774 uses)
[GeneratedRegex("^[\\s\\S]+?(?=[\\\\<!\\[*`~\\:]|\\b_|\\bhttps?:\\/\\/| {2,}\\n|$)")]
/// ○ Match if at the beginning of the string.<br/>
/// ○ Match any character lazily at least once.<br/>
/// ○ Zero-width positive lookahead.<br/>
- /// ○ Match with 5 alternative expressions, atomically.<br/>
+ /// ○ Match with 4 alternative expressions, atomically.<br/>
/// ○ Match a character in the set [!*:<[\\`~].<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match if at a word boundary.<br/>
- /// ○ Match '_'.<br/>
- /// ○ Match a sequence of expressions.<br/>
- /// ○ Match if at a word boundary.<br/>
- /// ○ Match the string "http".<br/>
- /// ○ Match 's' atomically, optionally.<br/>
- /// ○ Match the string "://".<br/>
+ /// ○ Match with 2 alternative expressions, atomically.<br/>
+ /// ○ Match '_'.<br/>
+ /// ○ Match a sequence of expressions.<br/>
+ /// ○ Match the string "http".<br/>
+ /// ○ Match 's' atomically, optionally.<br/>
+ /// ○ Match the string "://".<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match ' ' atomically at least twice.<br/>
/// ○ Match '\n'.<br/>
int atomic_stackpos = stackpos;
- // Match with 5 alternative expressions, atomically.
+ // Match with 4 alternative expressions, atomically.
{
int alternation_starting_pos = pos;
goto AlternationBranch1;
}
- // Match '_'.
- if (slice.IsEmpty || slice[0] != '_')
+ // Match with 2 alternative expressions, atomically.
{
- goto AlternationBranch1;
+ if (slice.IsEmpty)
+ {
+ goto AlternationBranch1;
+ }
+
+ switch (slice[0])
+ {
+ case '_':
+ pos++;
+ slice = inputSpan.Slice(pos);
+ break;
+
+ case 'h':
+ // Match the string "ttp".
+ if (!slice.Slice(1).StartsWith("ttp"))
+ {
+ goto AlternationBranch1;
+ }
+
+ // Match 's' atomically, optionally.
+ {
+ if ((uint)slice.Length > (uint)4 && slice[4] == 's')
+ {
+ slice = slice.Slice(1);
+ pos++;
+ }
+ }
+
+ // Match the string "://".
+ if (!slice.Slice(4).StartsWith("://"))
+ {
+ goto AlternationBranch1;
+ }
+
+ pos += 7;
+ slice = inputSpan.Slice(pos);
+ break;
+
+ default:
+ goto AlternationBranch1;
+ }
}
- pos++;
- slice = inputSpan.Slice(pos);
goto AlternationMatch;
AlternationBranch1:
}
// Branch 2
- {
- // Match if at a word boundary.
- if (!Utilities.IsPreWordCharBoundary(inputSpan, pos))
- {
- goto AlternationBranch2;
- }
-
- // Match the string "http".
- if (!slice.StartsWith("http"))
- {
- goto AlternationBranch2;
- }
-
- // Match 's' atomically, optionally.
- {
- if ((uint)slice.Length > (uint)4 && slice[4] == 's')
- {
- slice = slice.Slice(1);
- pos++;
- }
- }
-
- // Match the string "://".
- if (!slice.Slice(4).StartsWith("://"))
- {
- goto AlternationBranch2;
- }
-
- pos += 7;
- slice = inputSpan.Slice(pos);
- goto AlternationMatch;
-
- AlternationBranch2:
- pos = alternation_starting_pos;
- slice = inputSpan.Slice(pos);
- }
-
- // Branch 3
{
// Match ' ' atomically at least twice.
{
if (iteration < 2)
{
- goto AlternationBranch3;
+ goto AlternationBranch2;
}
slice = slice.Slice(iteration);
// Match '\n'.
if (slice.IsEmpty || slice[0] != '\n')
{
- goto AlternationBranch3;
+ goto AlternationBranch2;
}
pos++;
slice = inputSpan.Slice(pos);
goto AlternationMatch;
- AlternationBranch3:
+ AlternationBranch2:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
- // Branch 4
+ // Branch 3
{
// Match if at the end of the string or if before an ending newline.
if (pos < inputSpan.Length - 1 || ((uint)pos < (uint)inputSpan.Length && inputSpan[pos] != '\n'))
"\\G(?:[\"“”]|\\s|\\\\[@#*]|\\\\[@#*bfmv])" (33 uses)
[GeneratedRegex("\\G(?:[\"“”]|\\s|\\\\[@#*]|\\\\[@#*bfmv])")]
/// Explanation:<br/>
/// <code>
/// ○ Match if at the start position.<br/>
- /// ○ Match with 3 alternative expressions, atomically.<br/>
+ /// ○ Match with 2 alternative expressions, atomically.<br/>
/// ○ Match a character in the set ["\u201C\u201D\s].<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match '\\'.<br/>
- /// ○ Match a character in the set [#*@].<br/>
- /// ○ Match a sequence of expressions.<br/>
- /// ○ Match '\\'.<br/>
/// ○ Match a character in the set [#*@bfmv].<br/>
/// </code>
/// </remarks>
return false; // The input didn't match.
}
- // Match with 3 alternative expressions, atomically.
+ // Match with 2 alternative expressions, atomically.
{
int alternation_starting_pos = pos;
}
// Branch 1
- {
- if ((uint)slice.Length < 2 ||
- slice[0] != '\\' || // Match '\\'.
- (((ch = slice[1]) != '#') & (ch != '*') & (ch != '@'))) // Match a character in the set [#*@].
- {
- goto AlternationBranch1;
- }
-
- pos += 2;
- slice = inputSpan.Slice(pos);
- goto AlternationMatch;
-
- AlternationBranch1:
- pos = alternation_starting_pos;
- slice = inputSpan.Slice(pos);
- }
-
- // Branch 2
{
if ((uint)slice.Length < 2 ||
slice[0] != '\\' || // Match '\\'.
For more diff examples, see https://gist.github.com/MihuBot/8b933494bb1466554d325dc4ca9fc8d4
JIT assembly changes
Total bytes of base: 54284087
Total bytes of diff: 54418439
Total bytes of delta: 134352 (0.25 % of base)
Total relative delta: 50.97
diff is a regression.
relative diff is a regression.
For a list of JIT diff regressions, see Regressions.md
For a list of JIT diff improvements, see Improvements.md
Sample source code for further analysis
const string JsonPath = "RegexResults-1798.json";
if (!File.Exists(JsonPath))
{
await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/FH1OjmHA");
using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}
using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");
record KnownPattern(string Pattern, RegexOptions Options, int Count);
sealed class RegexEntry
{
public required KnownPattern Regex { get; set; }
public required string MainSource { get; set; }
public required string PrSource { get; set; }
public string? FullDiff { get; set; }
public string? ShortDiff { get; set; }
public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
}
Artifacts:
- ShortExampleDiffs.md (29 KB)
- LongExampleDiffs.md (902 KB)
- Results.zip (45 MB)
- jit-diffs.zip (391 MB)
- JitAnalyzeSummary.txt (70 KB)
- JitDiffRegressions.md (894 KB)
- LongJitDiffRegressions.md (47 MB)
- JitDiffImprovements.md (899 KB)
- LongJitDiffImprovements.md (28 MB)
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels