Skip to content

[RegexDiff X64] [danmoseley] Handle single-node branches in ExtractCommonPre ... #1798

@MihuBot

Description

@MihuBot

Job completed in 19 minutes 22 seconds (remote runner delay: 1 minute 38 seconds).
dotnet/runtime#124881
Using arguments: regexdiff
Main commit: dotnet/runtime@133c7bd
PR commit: danmoseley/runtime@5aa5d8b

396 out of 18857 patterns have generated source code changes.

Examples of GeneratedRegex source diffs
"[^'\",]+'[^^']+'|[^'\",]+\"[^\"]+\"|[^,]+" (21563 uses)
[GeneratedRegex("[^'\",]+'[^^']+'|[^'\",]+\"[^\"]+\"|[^,]+")]
  /// <code>[^'",]+'[^^']+'|[^'",]+"[^"]+"|[^,]+</code><br/>
  /// Explanation:<br/>
  /// <code>
-   /// ○ Match with 3 alternative expressions, atomically.<br/>
+   /// ○ Match with 2 alternative expressions, atomically.<br/>
  ///     ○ Match a sequence of expressions.<br/>
  ///         ○ Match a character in the set [^"',] atomically at least once.<br/>
-   ///         ○ Match '\''.<br/>
-   ///         ○ Match a character in the set [^'^] atomically at least once.<br/>
-   ///         ○ Match '\''.<br/>
-   ///     ○ Match a sequence of expressions.<br/>
-   ///         ○ Match a character in the set [^"',] atomically at least once.<br/>
-   ///         ○ Match '"'.<br/>
-   ///         ○ Match a character other than '"' atomically at least once.<br/>
-   ///         ○ Match '"'.<br/>
+   ///         ○ Match with 2 alternative expressions, atomically.<br/>
+   ///             ○ Match a sequence of expressions.<br/>
+   ///                 ○ Match '\''.<br/>
+   ///                 ○ Match a character in the set [^'^] atomically at least once.<br/>
+   ///                 ○ Match '\''.<br/>
+   ///             ○ Match a sequence of expressions.<br/>
+   ///                 ○ Match '"'.<br/>
+   ///                 ○ Match a character other than '"' atomically at least once.<br/>
+   ///                 ○ Match '"'.<br/>
  ///     ○ Match a character other than ',' atomically at least once.<br/>
  /// </code>
  /// </remarks>
                  int matchStart = pos;
                  ReadOnlySpan<char> slice = inputSpan.Slice(pos);
                  
-                   // Match with 3 alternative expressions, atomically.
+                   // Match with 2 alternative expressions, atomically.
                  {
                      int alternation_starting_pos = pos;
                      
                              pos += iteration;
                          }
                          
-                           // Match '\''.
-                           if (slice.IsEmpty || slice[0] != '\'')
+                           // Match with 2 alternative expressions, atomically.
                          {
-                               goto AlternationBranch;
-                           }
-                           
-                           // Match a character in the set [^'^] atomically at least once.
-                           {
-                               int iteration1 = slice.Slice(1).IndexOfAny('\'', '^');
-                               if (iteration1 < 0)
-                               {
-                                   iteration1 = slice.Length - 1;
-                               }
-                               
-                               if (iteration1 == 0)
+                               if (slice.IsEmpty)
                              {
                                  goto AlternationBranch;
                              }
                              
-                               slice = slice.Slice(iteration1);
-                               pos += iteration1;
+                               switch (slice[0])
+                               {
+                                   case '\'':
+                                       
+                                       // Match a character in the set [^'^] atomically at least once.
+                                       {
+                                           int iteration1 = slice.Slice(1).IndexOfAny('\'', '^');
+                                           if (iteration1 < 0)
+                                           {
+                                               iteration1 = slice.Length - 1;
+                                           }
+                                           
+                                           if (iteration1 == 0)
+                                           {
+                                               goto AlternationBranch;
+                                           }
+                                           
+                                           slice = slice.Slice(iteration1);
+                                           pos += iteration1;
+                                       }
+                                       
+                                       // Match '\''.
+                                       if ((uint)slice.Length < 2 || slice[1] != '\'')
+                                       {
+                                           goto AlternationBranch;
+                                       }
+                                       
+                                       pos += 2;
+                                       slice = inputSpan.Slice(pos);
+                                       break;
+                                       
+                                   case '"':
+                                       
+                                       // Match a character other than '"' atomically at least once.
+                                       {
+                                           int iteration2 = slice.Slice(1).IndexOf('"');
+                                           if (iteration2 < 0)
+                                           {
+                                               iteration2 = slice.Length - 1;
+                                           }
+                                           
+                                           if (iteration2 == 0)
+                                           {
+                                               goto AlternationBranch;
+                                           }
+                                           
+                                           slice = slice.Slice(iteration2);
+                                           pos += iteration2;
+                                       }
+                                       
+                                       // Match '"'.
+                                       if ((uint)slice.Length < 2 || slice[1] != '"')
+                                       {
+                                           goto AlternationBranch;
+                                       }
+                                       
+                                       pos += 2;
+                                       slice = inputSpan.Slice(pos);
+                                       break;
+                                       
+                                   default:
+                                       goto AlternationBranch;
+                               }
                          }
                          
-                           // Match '\''.
-                           if ((uint)slice.Length < 2 || slice[1] != '\'')
-                           {
-                               goto AlternationBranch;
-                           }
-                           
-                           pos += 2;
-                           slice = inputSpan.Slice(pos);
                          goto AlternationMatch;
                          
                          AlternationBranch:
                      
                      // Branch 1
                      {
-                           // Match a character in the set [^"',] atomically at least once.
+                           // Match a character other than ',' atomically at least once.
                          {
-                               int iteration2 = slice.IndexOfAny('"', '\'', ',');
-                               if (iteration2 < 0)
-                               {
-                                   iteration2 = slice.Length;
-                               }
-                               
-                               if (iteration2 == 0)
-                               {
-                                   goto AlternationBranch1;
-                               }
-                               
-                               slice = slice.Slice(iteration2);
-                               pos += iteration2;
-                           }
-                           
-                           // Match '"'.
-                           if (slice.IsEmpty || slice[0] != '"')
-                           {
-                               goto AlternationBranch1;
-                           }
-                           
-                           // Match a character other than '"' atomically at least once.
-                           {
-                               int iteration3 = slice.Slice(1).IndexOf('"');
+                               int iteration3 = slice.IndexOf(',');
                              if (iteration3 < 0)
                              {
-                                   iteration3 = slice.Length - 1;
+                                   iteration3 = slice.Length;
                              }
                              
                              if (iteration3 == 0)
                              {
-                                   goto AlternationBranch1;
+                                   return false; // The input didn't match.
                              }
                              
                              slice = slice.Slice(iteration3);
                              pos += iteration3;
                          }
                          
-                           // Match '"'.
-                           if ((uint)slice.Length < 2 || slice[1] != '"')
-                           {
-                               goto AlternationBranch1;
-                           }
-                           
-                           pos += 2;
-                           slice = inputSpan.Slice(pos);
-                           goto AlternationMatch;
-                           
-                           AlternationBranch1:
-                           pos = alternation_starting_pos;
-                           slice = inputSpan.Slice(pos);
-                       }
-                       
-                       // Branch 2
-                       {
-                           // Match a character other than ',' atomically at least once.
-                           {
-                               int iteration4 = slice.IndexOf(',');
-                               if (iteration4 < 0)
-                               {
-                                   iteration4 = slice.Length;
-                               }
-                               
-                               if (iteration4 == 0)
-                               {
-                                   return false; // The input didn't match.
-                               }
-                               
-                               slice = slice.Slice(iteration4);
-                               pos += iteration4;
-                           }
-                           
                      }
                      
                      AlternationMatch:;
"^(http|https)\\://[a-zA-Z0-9\\-\\.]+(:[a-zA- ..." (821 uses)
[GeneratedRegex("^(http|https)\\://[a-zA-Z0-9\\-\\.]+(:[a-zA-Z0-9]*)?(/[a-zA-Z0-9\\-\\._]*)*$", RegexOptions.IgnoreCase)]
  /// ○ 1st capture group.<br/>
  ///     ○ Match a character in the set [Hh].<br/>
  ///     ○ Match a character in the set [Tt] exactly 2 times.<br/>
-   ///     ○ Match with 2 alternative expressions.<br/>
-   ///         ○ Match a character in the set [Pp].<br/>
-   ///         ○ Match a sequence of expressions.<br/>
-   ///             ○ Match a character in the set [Pp].<br/>
-   ///             ○ Match a character in the set [Ss].<br/>
+   ///     ○ Match a character in the set [Pp].<br/>
+   ///     ○ Match a character in the set [Ss] atomically, optionally.<br/>
  /// ○ Match the string "://".<br/>
  /// ○ Match a character in the set [\-.0-9A-Za-z\u212A] greedily at least once.<br/>
  /// ○ Optional (greedy).<br/>
              {
                  int pos = base.runtextpos;
                  int matchStart = pos;
-                   int alternation_branch = 0;
-                   int alternation_starting_capturepos = 0;
-                   int alternation_starting_pos = 0;
                  int capture_starting_pos = 0;
                  int charloop_capture_pos = 0;
                  int charloop_starting_pos = 0, charloop_ending_pos = 0;
                  }
                  
                  // 1st capture group.
-                   //{
+                   {
                      capture_starting_pos = pos;
                      
-                       if ((uint)slice.Length < 3 ||
-                           !slice.StartsWith("htt", StringComparison.OrdinalIgnoreCase)) // Match the string "htt" (ordinal case-insensitive)
+                       if ((uint)slice.Length < 4 ||
+                           !slice.StartsWith("http", StringComparison.OrdinalIgnoreCase)) // Match the string "http" (ordinal case-insensitive)
                      {
                          UncaptureUntil(0);
                          return false; // The input didn't match.
                      }
                      
-                       // Match with 2 alternative expressions.
-                       //{
-                           alternation_starting_pos = pos;
-                           alternation_starting_capturepos = base.Crawlpos();
-                           
-                           // Branch 0
-                           //{
-                               // Match a character in the set [Pp].
-                               if ((uint)slice.Length < 4 || ((slice[3] | 0x20) != 'p'))
-                               {
-                                   goto AlternationBranch;
-                               }
-                               
-                               alternation_branch = 0;
-                               pos += 4;
-                               slice = inputSpan.Slice(pos);
-                               goto AlternationMatch;
-                               
-                               AlternationBranch:
-                               pos = alternation_starting_pos;
-                               slice = inputSpan.Slice(pos);
-                               UncaptureUntil(alternation_starting_capturepos);
-                           //}
-                           
-                           // Branch 1
-                           //{
-                               if ((uint)slice.Length < 5 ||
-                                   !slice.Slice(3).StartsWith("ps", StringComparison.OrdinalIgnoreCase)) // Match the string "ps" (ordinal case-insensitive)
-                               {
-                                   UncaptureUntil(0);
-                                   return false; // The input didn't match.
-                               }
-                               
-                               alternation_branch = 1;
-                               pos += 5;
-                               slice = inputSpan.Slice(pos);
-                               goto AlternationMatch;
-                           //}
-                           
-                           AlternationBacktrack:
-                           if (Utilities.s_hasTimeout)
+                       // Match a character in the set [Ss] atomically, optionally.
+                       {
+                           if ((uint)slice.Length > (uint)4 && ((slice[4] | 0x20) == 's'))
                          {
-                               base.CheckTimeout();
+                               slice = slice.Slice(1);
+                               pos++;
                          }
-                           
-                           switch (alternation_branch)
-                           {
-                               case 0:
-                                   goto AlternationBranch;
-                               case 1:
-                                   UncaptureUntil(0);
-                                   return false; // The input didn't match.
-                           }
-                           
-                           AlternationMatch:;
-                       //}
+                       }
                      
+                       pos += 4;
+                       slice = inputSpan.Slice(pos);
                      base.Capture(1, capture_starting_pos, pos);
-                       
-                       goto CaptureSkipBacktrack;
-                       
-                       CaptureBacktrack:
-                       goto AlternationBacktrack;
-                       
-                       CaptureSkipBacktrack:;
-                   //}
+                   }
                  
                  // Match the string "://".
                  if (!slice.StartsWith("://"))
                  {
-                       goto CaptureBacktrack;
+                       UncaptureUntil(0);
+                       return false; // The input didn't match.
                  }
                  
                  // Match a character in the set [\-.0-9A-Za-z\u212A] greedily at least once.
                      
                      if (iteration == 0)
                      {
-                           goto CaptureBacktrack;
+                           UncaptureUntil(0);
+                           return false; // The input didn't match.
                      }
                      
                      slice = slice.Slice(iteration);
                      
                      if (charloop_starting_pos >= charloop_ending_pos)
                      {
-                           goto CaptureBacktrack;
+                           UncaptureUntil(0);
+                           return false; // The input didn't match.
                      }
                      pos = --charloop_ending_pos;
                      slice = inputSpan.Slice(pos);
                          base.Capture(2, capture_starting_pos1, pos);
                          
                          Utilities.StackPush(ref base.runstack!, ref stackpos, capture_starting_pos1);
-                           goto CaptureSkipBacktrack1;
+                           goto CaptureSkipBacktrack;
                          
-                           CaptureBacktrack1:
+                           CaptureBacktrack:
                          capture_starting_pos1 = base.runstack![--stackpos];
                          goto CharLoopBacktrack1;
                          
-                           CaptureSkipBacktrack1:;
+                           CaptureSkipBacktrack:;
                      //}
                      
                      
                          // No iterations of the loop remain to backtrack into. Fail the loop.
                          goto CharLoopBacktrack;
                      }
-                       goto CaptureBacktrack1;
+                       goto CaptureBacktrack;
                      LoopEnd:;
                  //}
"^[\\s\\S]+?(?=[\\\\<!\\[*`~\\:]|\\b_|\\bhttp ..." (774 uses)
[GeneratedRegex("^[\\s\\S]+?(?=[\\\\<!\\[*`~\\:]|\\b_|\\bhttps?:\\/\\/| {2,}\\n|$)")]
  /// ○ Match if at the beginning of the string.<br/>
  /// ○ Match any character lazily at least once.<br/>
  /// ○ Zero-width positive lookahead.<br/>
-   ///     ○ Match with 5 alternative expressions, atomically.<br/>
+   ///     ○ Match with 4 alternative expressions, atomically.<br/>
  ///         ○ Match a character in the set [!*:&lt;[\\`~].<br/>
  ///         ○ Match a sequence of expressions.<br/>
  ///             ○ Match if at a word boundary.<br/>
-   ///             ○ Match '_'.<br/>
-   ///         ○ Match a sequence of expressions.<br/>
-   ///             ○ Match if at a word boundary.<br/>
-   ///             ○ Match the string "http".<br/>
-   ///             ○ Match 's' atomically, optionally.<br/>
-   ///             ○ Match the string "://".<br/>
+   ///             ○ Match with 2 alternative expressions, atomically.<br/>
+   ///                 ○ Match '_'.<br/>
+   ///                 ○ Match a sequence of expressions.<br/>
+   ///                     ○ Match the string "http".<br/>
+   ///                     ○ Match 's' atomically, optionally.<br/>
+   ///                     ○ Match the string "://".<br/>
  ///         ○ Match a sequence of expressions.<br/>
  ///             ○ Match ' ' atomically at least twice.<br/>
  ///             ○ Match '\n'.<br/>
                      
                      int atomic_stackpos = stackpos;
                      
-                       // Match with 5 alternative expressions, atomically.
+                       // Match with 4 alternative expressions, atomically.
                      {
                          int alternation_starting_pos = pos;
                          
                                  goto AlternationBranch1;
                              }
                              
-                               // Match '_'.
-                               if (slice.IsEmpty || slice[0] != '_')
+                               // Match with 2 alternative expressions, atomically.
                              {
-                                   goto AlternationBranch1;
+                                   if (slice.IsEmpty)
+                                   {
+                                       goto AlternationBranch1;
+                                   }
+                                   
+                                   switch (slice[0])
+                                   {
+                                       case '_':
+                                           pos++;
+                                           slice = inputSpan.Slice(pos);
+                                           break;
+                                           
+                                       case 'h':
+                                           // Match the string "ttp".
+                                           if (!slice.Slice(1).StartsWith("ttp"))
+                                           {
+                                               goto AlternationBranch1;
+                                           }
+                                           
+                                           // Match 's' atomically, optionally.
+                                           {
+                                               if ((uint)slice.Length > (uint)4 && slice[4] == 's')
+                                               {
+                                                   slice = slice.Slice(1);
+                                                   pos++;
+                                               }
+                                           }
+                                           
+                                           // Match the string "://".
+                                           if (!slice.Slice(4).StartsWith("://"))
+                                           {
+                                               goto AlternationBranch1;
+                                           }
+                                           
+                                           pos += 7;
+                                           slice = inputSpan.Slice(pos);
+                                           break;
+                                           
+                                       default:
+                                           goto AlternationBranch1;
+                                   }
                              }
                              
-                               pos++;
-                               slice = inputSpan.Slice(pos);
                              goto AlternationMatch;
                              
                              AlternationBranch1:
                          }
                          
                          // Branch 2
-                           {
-                               // Match if at a word boundary.
-                               if (!Utilities.IsPreWordCharBoundary(inputSpan, pos))
-                               {
-                                   goto AlternationBranch2;
-                               }
-                               
-                               // Match the string "http".
-                               if (!slice.StartsWith("http"))
-                               {
-                                   goto AlternationBranch2;
-                               }
-                               
-                               // Match 's' atomically, optionally.
-                               {
-                                   if ((uint)slice.Length > (uint)4 && slice[4] == 's')
-                                   {
-                                       slice = slice.Slice(1);
-                                       pos++;
-                                   }
-                               }
-                               
-                               // Match the string "://".
-                               if (!slice.Slice(4).StartsWith("://"))
-                               {
-                                   goto AlternationBranch2;
-                               }
-                               
-                               pos += 7;
-                               slice = inputSpan.Slice(pos);
-                               goto AlternationMatch;
-                               
-                               AlternationBranch2:
-                               pos = alternation_starting_pos;
-                               slice = inputSpan.Slice(pos);
-                           }
-                           
-                           // Branch 3
                          {
                              // Match ' ' atomically at least twice.
                              {
                                  
                                  if (iteration < 2)
                                  {
-                                       goto AlternationBranch3;
+                                       goto AlternationBranch2;
                                  }
                                  
                                  slice = slice.Slice(iteration);
                              // Match '\n'.
                              if (slice.IsEmpty || slice[0] != '\n')
                              {
-                                   goto AlternationBranch3;
+                                   goto AlternationBranch2;
                              }
                              
                              pos++;
                              slice = inputSpan.Slice(pos);
                              goto AlternationMatch;
                              
-                               AlternationBranch3:
+                               AlternationBranch2:
                              pos = alternation_starting_pos;
                              slice = inputSpan.Slice(pos);
                          }
                          
-                           // Branch 4
+                           // Branch 3
                          {
                              // Match if at the end of the string or if before an ending newline.
                              if (pos < inputSpan.Length - 1 || ((uint)pos < (uint)inputSpan.Length && inputSpan[pos] != '\n'))
"\\G(?:[\"“”]|\\s|\\\\[@#*]|\\\\[@#*bfmv])" (33 uses)
[GeneratedRegex("\\G(?:[\"“”]|\\s|\\\\[@#*]|\\\\[@#*bfmv])")]
  /// Explanation:<br/>
  /// <code>
  /// ○ Match if at the start position.<br/>
-   /// ○ Match with 3 alternative expressions, atomically.<br/>
+   /// ○ Match with 2 alternative expressions, atomically.<br/>
  ///     ○ Match a character in the set ["\u201C\u201D\s].<br/>
  ///     ○ Match a sequence of expressions.<br/>
  ///         ○ Match '\\'.<br/>
-   ///         ○ Match a character in the set [#*@].<br/>
-   ///     ○ Match a sequence of expressions.<br/>
-   ///         ○ Match '\\'.<br/>
  ///         ○ Match a character in the set [#*@bfmv].<br/>
  /// </code>
  /// </remarks>
                      return false; // The input didn't match.
                  }
                  
-                   // Match with 3 alternative expressions, atomically.
+                   // Match with 2 alternative expressions, atomically.
                  {
                      int alternation_starting_pos = pos;
                      
                      }
                      
                      // Branch 1
-                       {
-                           if ((uint)slice.Length < 2 ||
-                               slice[0] != '\\' || // Match '\\'.
-                               (((ch = slice[1]) != '#') & (ch != '*') & (ch != '@'))) // Match a character in the set [#*@].
-                           {
-                               goto AlternationBranch1;
-                           }
-                           
-                           pos += 2;
-                           slice = inputSpan.Slice(pos);
-                           goto AlternationMatch;
-                           
-                           AlternationBranch1:
-                           pos = alternation_starting_pos;
-                           slice = inputSpan.Slice(pos);
-                       }
-                       
-                       // Branch 2
                      {
                          if ((uint)slice.Length < 2 ||
                              slice[0] != '\\' || // Match '\\'.

For more diff examples, see https://gist.github.com/MihuBot/8b933494bb1466554d325dc4ca9fc8d4

JIT assembly changes
Total bytes of base: 54284087
Total bytes of diff: 54418439
Total bytes of delta: 134352 (0.25 % of base)
Total relative delta: 50.97
    diff is a regression.
    relative diff is a regression.

For a list of JIT diff regressions, see Regressions.md
For a list of JIT diff improvements, see Improvements.md

Sample source code for further analysis
const string JsonPath = "RegexResults-1798.json";
if (!File.Exists(JsonPath))
{
    await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/FH1OjmHA");
    using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
    archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}

using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");



record KnownPattern(string Pattern, RegexOptions Options, int Count);

sealed class RegexEntry
{
    public required KnownPattern Regex { get; set; }
    public required string MainSource { get; set; }
    public required string PrSource { get; set; }
    public string? FullDiff { get; set; }
    public string? ShortDiff { get; set; }
    public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
    public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
}

Artifacts:

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions