Skip to content

[RegexDiff X64] [danmoseley] Remove redundant Atomic wrapper and fix shared- ... #1833

@MihuBot

Description

@MihuBot

Job completed in 14 minutes 2 seconds (remote runner delay: 1 minute 7 seconds).
dotnet/runtime#126114
Using arguments: regexdiff
Main commit: dotnet/runtime@44cd131
PR commit: danmoseley/runtime@8d88d04

460 out of 18857 patterns have generated source code changes.

Examples of GeneratedRegex source diffs
"^\\s*\\[(([^#;]|\\\\#|\\\\;)+)\\]\\s*([#;].*)?$" (3059 uses)
[GeneratedRegex("^\\s*\\[(([^#;]|\\\\#|\\\\;)+)\\]\\s*([#;].*)?$")]
  /// ○ 1st capture group.<br/>
  ///     ○ Loop greedily at least once.<br/>
  ///         ○ 2nd capture group.<br/>
-   ///             ○ Match with 3 alternative expressions.<br/>
+   ///             ○ Match with 2 alternative expressions.<br/>
  ///                 ○ Match a character in the set [^#;].<br/>
-   ///                 ○ Match the string "\\#".<br/>
-   ///                 ○ Match the string "\\;".<br/>
+   ///                 ○ Match a sequence of expressions.<br/>
+   ///                     ○ Match '\\'.<br/>
+   ///                     ○ Match a character in the set [#;].<br/>
  /// ○ Match ']'.<br/>
  /// ○ Match a whitespace character greedily any number of times.<br/>
  /// ○ Optional (greedy).<br/>
                          //{
                              int capture_starting_pos1 = pos;
                              
-                               // Match with 3 alternative expressions.
+                               // Match with 2 alternative expressions.
                              //{
                                  int alternation_starting_pos = pos;
                                  int alternation_starting_capturepos = base.Crawlpos();
                                  
                                  // Branch 1
                                  //{
-                                       // Match the string "\\#".
-                                       if (!slice.StartsWith("\\#"))
-                                       {
-                                           goto AlternationBranch1;
-                                       }
-                                       
-                                       Utilities.StackPush(ref base.runstack!, ref stackpos, 1, alternation_starting_pos, alternation_starting_capturepos);
-                                       pos += 2;
-                                       slice = inputSpan.Slice(pos);
-                                       goto AlternationMatch;
-                                       
-                                       AlternationBranch1:
-                                       pos = alternation_starting_pos;
-                                       slice = inputSpan.Slice(pos);
-                                       UncaptureUntil(alternation_starting_capturepos);
-                                   //}
-                                   
-                                   // Branch 2
-                                   //{
-                                       // Match the string "\\;".
-                                       if (!slice.StartsWith("\\;"))
+                                       if ((uint)slice.Length < 2 ||
+                                           slice[0] != '\\' || // Match '\\'.
+                                           (((ch = slice[1]) != '#') & (ch != ';'))) // Match a character in the set [#;].
                                      {
                                          goto LoopIterationNoMatch;
                                      }
                                      
-                                       Utilities.StackPush(ref base.runstack!, ref stackpos, 2, alternation_starting_pos, alternation_starting_capturepos);
+                                       Utilities.StackPush(ref base.runstack!, ref stackpos, 1, alternation_starting_pos, alternation_starting_capturepos);
                                      pos += 2;
                                      slice = inputSpan.Slice(pos);
                                      goto AlternationMatch;
                                      case 0:
                                          goto AlternationBranch;
                                      case 1:
-                                           goto AlternationBranch1;
-                                       case 2:
                                          goto LoopIterationNoMatch;
                                  }
"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25 ..." (1964 uses)
[GeneratedRegex("^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$|^(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\\-]*[a-zA-Z0-9])\\.)*([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\\-]*[A-Za-z0-9])$", RegexOptions.IgnoreCase)]
  ///         ○ Loop exactly 3 times.<br/>
  ///             ○ 1st capture group.<br/>
  ///                 ○ 2nd capture group.<br/>
-   ///                     ○ Match with 5 alternative expressions.<br/>
+   ///                     ○ Match with 4 alternative expressions.<br/>
  ///                         ○ Match a character in the set [0-9].<br/>
  ///                         ○ Match a sequence of expressions.<br/>
  ///                             ○ Match a character in the set [1-9].<br/>
  ///                             ○ Match a character in the set [0-9] exactly 2 times.<br/>
  ///                         ○ Match a sequence of expressions.<br/>
  ///                             ○ Match '2'.<br/>
-   ///                             ○ Match a character in the set [0-4].<br/>
-   ///                             ○ Match a character in the set [0-9].<br/>
-   ///                         ○ Match a sequence of expressions.<br/>
-   ///                             ○ Match the string "25".<br/>
-   ///                             ○ Match a character in the set [0-5].<br/>
+   ///                             ○ Match with 2 alternative expressions.<br/>
+   ///                                 ○ Match a sequence of expressions.<br/>
+   ///                                     ○ Match a character in the set [0-4].<br/>
+   ///                                     ○ Match a character in the set [0-9].<br/>
+   ///                                 ○ Match a sequence of expressions.<br/>
+   ///                                     ○ Match '5'.<br/>
+   ///                                     ○ Match a character in the set [0-5].<br/>
  ///                 ○ Match '.'.<br/>
  ///         ○ 3rd capture group.<br/>
-   ///             ○ Match with 5 alternative expressions.<br/>
+   ///             ○ Match with 4 alternative expressions.<br/>
  ///                 ○ Match a character in the set [0-9].<br/>
  ///                 ○ Match a sequence of expressions.<br/>
  ///                     ○ Match a character in the set [1-9].<br/>
  ///                     ○ Match a character in the set [0-9] exactly 2 times.<br/>
  ///                 ○ Match a sequence of expressions.<br/>
  ///                     ○ Match '2'.<br/>
-   ///                     ○ Match a character in the set [0-4].<br/>
-   ///                     ○ Match a character in the set [0-9].<br/>
-   ///                 ○ Match a sequence of expressions.<br/>
-   ///                     ○ Match the string "25".<br/>
-   ///                     ○ Match a character in the set [0-5].<br/>
+   ///                     ○ Match with 2 alternative expressions.<br/>
+   ///                         ○ Match a sequence of expressions.<br/>
+   ///                             ○ Match a character in the set [0-4].<br/>
+   ///                             ○ Match a character in the set [0-9].<br/>
+   ///                         ○ Match a sequence of expressions.<br/>
+   ///                             ○ Match '5'.<br/>
+   ///                             ○ Match a character in the set [0-5].<br/>
  ///         ○ Match if at the end of the string or if before an ending newline.<br/>
  ///     ○ Match a sequence of expressions.<br/>
  ///         ○ Loop greedily any number of times.<br/>
                                      //{
                                          int capture_starting_pos1 = pos;
                                          
-                                           // Match with 5 alternative expressions.
+                                           // Match with 4 alternative expressions.
                                          //{
                                              int alternation_starting_pos1 = pos;
                                              int alternation_starting_capturepos1 = base.Crawlpos();
                                              
                                              // Branch 3
                                              //{
-                                                   if ((uint)slice.Length < 3 ||
-                                                       slice[0] != '2' || // Match '2'.
-                                                       !char.IsBetween(slice[1], '0', '4') || // Match a character in the set [0-4].
-                                                       !char.IsAsciiDigit(slice[2])) // Match a character in the set [0-9].
-                                                   {
-                                                       goto AlternationBranch4;
-                                                   }
-                                                   
-                                                   Utilities.StackPush(ref base.runstack!, ref stackpos, 3, alternation_starting_pos1, alternation_starting_capturepos1);
-                                                   pos += 3;
-                                                   slice = inputSpan.Slice(pos);
-                                                   goto AlternationMatch1;
-                                                   
-                                                   AlternationBranch4:
-                                                   pos = alternation_starting_pos1;
-                                                   slice = inputSpan.Slice(pos);
-                                                   UncaptureUntil(alternation_starting_capturepos1);
-                                               //}
-                                               
-                                               // Branch 4
-                                               //{
-                                                   if ((uint)slice.Length < 3 ||
-                                                       !slice.StartsWith("25", StringComparison.OrdinalIgnoreCase) || // Match the string "25" (ordinal case-insensitive)
-                                                       !char.IsBetween(slice[2], '0', '5')) // Match a character in the set [0-5].
+                                                   // Match '2'.
+                                                   if (slice.IsEmpty || slice[0] != '2')
                                                  {
                                                      goto LoopIterationNoMatch;
                                                  }
                                                  
-                                                   Utilities.StackPush(ref base.runstack!, ref stackpos, 4, alternation_starting_pos1, alternation_starting_capturepos1);
-                                                   pos += 3;
-                                                   slice = inputSpan.Slice(pos);
+                                                   // Match with 2 alternative expressions.
+                                                   //{
+                                                       if ((uint)slice.Length < 2)
+                                                       {
+                                                           goto LoopIterationNoMatch;
+                                                       }
+                                                       
+                                                       switch (slice[1])
+                                                       {
+                                                           case '0' or '1' or '2' or '3' or '4':
+                                                               
+                                                               // Match a character in the set [0-9].
+                                                               if ((uint)slice.Length < 3 || !char.IsAsciiDigit(slice[2]))
+                                                               {
+                                                                   goto LoopIterationNoMatch;
+                                                               }
+                                                               
+                                                               pos += 3;
+                                                               slice = inputSpan.Slice(pos);
+                                                               break;
+                                                               
+                                                           case '5':
+                                                               
+                                                               // Match a character in the set [0-5].
+                                                               if ((uint)slice.Length < 3 || !char.IsBetween(slice[2], '0', '5'))
+                                                               {
+                                                                   goto LoopIterationNoMatch;
+                                                               }
+                                                               
+                                                               pos += 3;
+                                                               slice = inputSpan.Slice(pos);
+                                                               break;
+                                                               
+                                                           default:
+                                                               goto LoopIterationNoMatch;
+                                                       }
+                                                   //}
+                                                   
+                                                   Utilities.StackPush(ref base.runstack!, ref stackpos, 3, alternation_starting_pos1, alternation_starting_capturepos1);
                                                  goto AlternationMatch1;
                                              //}
                                              
                                                  case 2:
                                                      goto AlternationBranch3;
                                                  case 3:
-                                                       goto AlternationBranch4;
-                                                   case 4:
                                                      goto LoopIterationNoMatch;
                                              }
                                              
                              //{
                                  capture_starting_pos2 = pos;
                                  
-                                   // Match with 5 alternative expressions.
+                                   // Match with 4 alternative expressions.
                                  //{
                                      alternation_starting_pos2 = pos;
                                      alternation_starting_capturepos2 = base.Crawlpos();
                                          // Match a character in the set [0-9].
                                          if (slice.IsEmpty || !char.IsAsciiDigit(slice[0]))
                                          {
-                                               goto AlternationBranch5;
+                                               goto AlternationBranch4;
                                          }
                                          
                                          alternation_branch = 0;
                                          slice = inputSpan.Slice(pos);
                                          goto AlternationMatch2;
                                          
-                                           AlternationBranch5:
+                                           AlternationBranch4:
                                          pos = alternation_starting_pos2;
                                          slice = inputSpan.Slice(pos);
                                          UncaptureUntil(alternation_starting_capturepos2);
                                              !char.IsBetween(slice[0], '1', '9') || // Match a character in the set [1-9].
                                              !char.IsAsciiDigit(slice[1])) // Match a character in the set [0-9].
                                          {
-                                               goto AlternationBranch6;
+                                               goto AlternationBranch5;
                                          }
                                          
                                          alternation_branch = 1;
                                          slice = inputSpan.Slice(pos);
                                          goto AlternationMatch2;
                                          
-                                           AlternationBranch6:
+                                           AlternationBranch5:
                                          pos = alternation_starting_pos2;
                                          slice = inputSpan.Slice(pos);
                                          UncaptureUntil(alternation_starting_capturepos2);
                                              !char.IsAsciiDigit(slice[1]) || // Match a character in the set [0-9] exactly 2 times.
                                              !char.IsAsciiDigit(slice[2]))
                                          {
-                                               goto AlternationBranch7;
+                                               goto AlternationBranch6;
                                          }
                                          
                                          alternation_branch = 2;
                                          slice = inputSpan.Slice(pos);
                                          goto AlternationMatch2;
                                          
-                                           AlternationBranch7:
+                                           AlternationBranch6:
                                          pos = alternation_starting_pos2;
                                          slice = inputSpan.Slice(pos);
                                          UncaptureUntil(alternation_starting_capturepos2);
                                      
                                      // Branch 3
                                      //{
-                                           if ((uint)slice.Length < 3 ||
-                                               slice[0] != '2' || // Match '2'.
-                                               !char.IsBetween(slice[1], '0', '4') || // Match a character in the set [0-4].
-                                               !char.IsAsciiDigit(slice[2])) // Match a character in the set [0-9].
-                                           {
-                                               goto AlternationBranch8;
-                                           }
-                                           
-                                           alternation_branch = 3;
-                                           pos += 3;
-                                           slice = inputSpan.Slice(pos);
-                                           goto AlternationMatch2;
-                                           
-                                           AlternationBranch8:
-                                           pos = alternation_starting_pos2;
-                                           slice = inputSpan.Slice(pos);
-                                           UncaptureUntil(alternation_starting_capturepos2);
-                                       //}
-                                       
-                                       // Branch 4
-                                       //{
-                                           if ((uint)slice.Length < 3 ||
-                                               !slice.StartsWith("25", StringComparison.OrdinalIgnoreCase) || // Match the string "25" (ordinal case-insensitive)
-                                               !char.IsBetween(slice[2], '0', '5')) // Match a character in the set [0-5].
+                                           // Match '2'.
+                                           if (slice.IsEmpty || slice[0] != '2')
                                          {
                                              goto LoopBacktrack;
                                          }
                                          
-                                           alternation_branch = 4;
-                                           pos += 3;
-                                           slice = inputSpan.Slice(pos);
+                                           // Match with 2 alternative expressions.
+                                           //{
+                                               if ((uint)slice.Length < 2)
+                                               {
+                                                   goto LoopBacktrack;
+                                               }
+                                               
+                                               switch (slice[1])
+                                               {
+                                                   case '0' or '1' or '2' or '3' or '4':
+                                                       
+                                                       // Match a character in the set [0-9].
+                                                       if ((uint)slice.Length < 3 || !char.IsAsciiDigit(slice[2]))
+                                                       {
+                                                           goto LoopBacktrack;
+                                                       }
+                                                       
+                                                       pos += 3;
+                                                       slice = inputSpan.Slice(pos);
+                                                       break;
+                                                       
+                                                   case '5':
+                                                       
+                                                       // Match a character in the set [0-5].
+                                                       if ((uint)slice.Length < 3 || !char.IsBetween(slice[2], '0', '5'))
+                                                       {
+                                                           goto LoopBacktrack;
+                                                       }
+                                                       
+                                                       pos += 3;
+                                                       slice = inputSpan.Slice(pos);
+                                                       break;
+                                                       
+                                                   default:
+                                                       goto LoopBacktrack;
+                                               }
+                                           //}
+                                           
+                                           alternation_branch = 3;
                                          goto AlternationMatch2;
                                      //}
                                      
                                      switch (alternation_branch)
                                      {
                                          case 0:
-                                               goto AlternationBranch5;
+                                               goto AlternationBranch4;
                                          case 1:
-                                               goto AlternationBranch6;
+                                               goto AlternationBranch5;
                                          case 2:
-                                               goto AlternationBranch7;
+                                               goto AlternationBranch6;
                                          case 3:
-                                               goto AlternationBranch8;
-                                           case 4:
                                              goto LoopBacktrack;
                                      }
"^(?'protocol'\\w+\\:\\/\\/)?(?>(?'user'.*)@) ..." (283 uses)
[GeneratedRegex("^(?'protocol'\\w+\\:\\/\\/)?(?>(?'user'.*)@)?(?'endpoint'[^\\/:]+)(?>\\:(?'port'\\d+))?[\\/:](?'identifier'.*?)\\/?(?>\\.git)?$")]
  ///     ○ Match a character other than '\n' lazily any number of times.<br/>
  /// ○ Match '/' greedily, optionally.<br/>
  /// ○ Optional (greedy).<br/>
-   ///     ○ Atomic group.<br/>
-   ///         ○ Match the string ".git".<br/>
+   ///     ○ Match the string ".git".<br/>
  /// ○ Match if at the end of the string or if before an ending newline.<br/>
  /// </code>
  /// </remarks>
                  int loop_iteration2 = 0;
                  int loop_iteration3 = 0;
                  int stackpos = 0;
+                   int startingStackpos = 0;
                  ReadOnlySpan<char> slice = inputSpan.Slice(pos);
                  
                  // Match if at the beginning of the string.
                  //}
                  
                  // Optional (greedy).
-                   //{
+                   {
+                       startingStackpos = stackpos;
                      loop_iteration3 = 0;
                      
                      LoopBody3:
                      pos = base.runstack![--stackpos];
                      UncaptureUntil(base.runstack![--stackpos]);
                      slice = inputSpan.Slice(pos);
-                       LoopEnd3:;
-                   //}
+                       LoopEnd3:
+                       stackpos = startingStackpos; // Ensure any remaining backtracking state is removed.
+                   }
                  
                  // Match if at the end of the string or if before an ending newline.
                  if (pos < inputSpan.Length - 1 || ((uint)pos < (uint)inputSpan.Length && inputSpan[pos] != '\n'))
                  {
-                       goto LoopIterationNoMatch3;
+                       goto CharLoopBacktrack2;
                  }
                  
                  // The input matched.
"(?<timeOfDay>凌晨|清晨|早上|早|上午|中午|下午|午后|晚上|夜里|夜晚 ..." (186 uses)
[GeneratedRegex("(?<timeOfDay>凌晨|清晨|早上|早|上午|中午|下午|午后|晚上|夜里|夜晚|半夜|夜间|深夜|傍晚|晚)", RegexOptions.ExplicitCapture | RegexOptions.Singleline)]
  ///             ○ Match '上' atomically, optionally.<br/>
  ///         ○ Match a sequence of expressions.<br/>
  ///             ○ Match '夜'.<br/>
-   ///             ○ Atomic group.<br/>
-   ///                 ○ Match a character in the set [\u665A\u91CC\u95F4].<br/>
+   ///             ○ Match a character in the set [\u665A\u91CC\u95F4].<br/>
  ///         ○ Match the string "半夜".<br/>
  ///         ○ Match the string "深夜".<br/>
  ///         ○ Match the string "傍晚".<br/>
"^(?'protocol'\\w+)?(\\:\\/\\/)?(?>(?'user'.* ..." (125 uses)
[GeneratedRegex("^(?'protocol'\\w+)?(\\:\\/\\/)?(?>(?'user'.*)@)?(?'endpoint'[^\\/:]+)(?>\\:(?'port'\\d+))?[\\/:](?'identifier'.*?)\\/?(?>\\.git)?$")]
  ///     ○ Match a character other than '\n' lazily any number of times.<br/>
  /// ○ Match '/' greedily, optionally.<br/>
  /// ○ Optional (greedy).<br/>
-   ///     ○ Atomic group.<br/>
-   ///         ○ Match the string ".git".<br/>
+   ///     ○ Match the string ".git".<br/>
  /// ○ Match if at the end of the string or if before an ending newline.<br/>
  /// </code>
  /// </remarks>
                  int loop_iteration3 = 0;
                  int loop_iteration4 = 0;
                  int stackpos = 0;
+                   int startingStackpos = 0;
                  ReadOnlySpan<char> slice = inputSpan.Slice(pos);
                  
                  // Match if at the beginning of the string.
                  //}
                  
                  // Optional (greedy).
-                   //{
+                   {
+                       startingStackpos = stackpos;
                      loop_iteration4 = 0;
                      
                      LoopBody4:
                      pos = base.runstack![--stackpos];
                      UncaptureUntil(base.runstack![--stackpos]);
                      slice = inputSpan.Slice(pos);
-                       LoopEnd4:;
-                   //}
+                       LoopEnd4:
+                       stackpos = startingStackpos; // Ensure any remaining backtracking state is removed.
+                   }
                  
                  // Match if at the end of the string or if before an ending newline.
                  if (pos < inputSpan.Length - 1 || ((uint)pos < (uint)inputSpan.Length && inputSpan[pos] != '\n'))
                  {
-                       goto LoopIterationNoMatch4;
+                       goto CharLoopBacktrack3;
                  }
                  
                  // The input matched.

For more diff examples, see https://gist.github.com/MihuBot/726ab2347ad8984e604bb47e9b493b8b

Sample source code for further analysis
const string JsonPath = "RegexResults-1833.json";
if (!File.Exists(JsonPath))
{
    await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/FKDmJWKA");
    using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
    archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}

using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");



record KnownPattern(string Pattern, RegexOptions Options, int Count);

sealed class RegexEntry
{
    public required KnownPattern Regex { get; set; }
    public required string MainSource { get; set; }
    public required string PrSource { get; set; }
    public string? FullDiff { get; set; }
    public string? ShortDiff { get; set; }
    public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
    public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
}

Artifacts:

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions