Skip to content

Commit

Permalink
Simplify package ID regex (#8453)
Browse files Browse the repository at this point in the history
Also refactors some regex helpers.
  • Loading branch information
loic-sharma committed Mar 15, 2021
1 parent a79b0f0 commit 25d2d3b
Show file tree
Hide file tree
Showing 12 changed files with 142 additions and 73 deletions.
119 changes: 119 additions & 0 deletions src/NuGetGallery.Core/Extensions/RegexEx.cs
@@ -0,0 +1,119 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Text.RegularExpressions;

namespace NuGetGallery
{
public static class RegexEx
{
// This timeout must be short enough to prevent runaway regular expressions,
// but long enough to prevent reliability issues across all our regular expressions.
private static readonly TimeSpan Timeout = TimeSpan.FromSeconds(15);

/// <summary>
/// Creates a new instance of the <see cref="Regex"/> class with a default timeout configured
/// for the pattern matching method to attempt a match.
/// </summary>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <param name="options">A bitwise combiantion of the enumeration values that modify the expression.</param>
/// <returns>A regular expression instance that can be used to match inputs.</returns>
public static Regex CreateWithTimeout(string pattern, RegexOptions options)
{
return new Regex(pattern, options, Timeout);
}

/// <summary>
/// In a specific input string, replaces all substrings that match a specified regular expression.
/// Throws a <see cref="RegexMatchTimeoutException"/> if the timeout is reached.
/// </summary>
/// <param name="input">The string to search for matches.</param>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <param name="evaluator">The handler to replace matches.</param>
/// <param name="options">A bitwise combination that provide options for matching.</param>
/// <returns>A new string with the matches replaced.</returns>
/// <exception cref="RegexMatchTimeoutException">Thrown if the matches exceed the default timeout.</exception>
public static string ReplaceWithTimeout(
string input,
string pattern,
string replacement,
RegexOptions options)
{
return Regex.Replace(input, pattern, replacement, options, Timeout);
}

/// <summary>
/// In a specific input string, replaces all substrings that match a specified regular expression.
/// </summary>
/// <param name="input">The string to search for matches.</param>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <param name="evaluator">The handler to replace matches.</param>
/// <param name="options">A bitwise combination that provide options for matching.</param>
/// <returns>A new string with the matches replaced, or the original string if the matches timeout.</returns>
public static string ReplaceWithTimeoutOrOriginal(
string input,
string pattern,
MatchEvaluator evaluator,
RegexOptions options)
{
try
{
return Regex.Replace(input, pattern, evaluator, options, Timeout);
}
catch (RegexMatchTimeoutException)
{
return input;
}
}

/// <summary>
/// Searches the input string for the first occurrence of the specified regular expression,
/// using the specified matching options and the default time-out interval.
/// </summary>
/// <param name="input">The string to search for a match.</param>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <param name="options">A bitwise combination of the enumeration values that provide options for matching.</param>
/// <returns>An object that contains information about the match, or <c>null</c> if and only if the match timed out.</returns>
public static Match MatchWithTimeoutOrNull(
string input,
string pattern,
RegexOptions options)
{
try
{
return Regex.Match(input, pattern, options, Timeout);
}
catch (RegexMatchTimeoutException)
{
return null;
}
}

/// <summary>
/// Searches the input string for all occurrence of the specified regular expression,
/// using the specified matching options and the default time-out interval.
/// </summary>
/// <param name="input">The string to search for a match.</param>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <param name="options">A bitwise combination of the enumeration values that provide options for matching.</param>
/// <returns>
/// A collection of the matches found by the search.
/// If no matches are found, the method returns an empty collection.
/// If and only if the matches timeout, returns <c>null</c>.</returns>
public static MatchCollection MatchesWithTimeoutOrNull(
string input,
string pattern,
RegexOptions options)
{
try
{
return Regex.Matches(input, pattern, options, Timeout);
}
catch (RegexMatchTimeoutException)
{
return null;
}
}
}
}
4 changes: 3 additions & 1 deletion src/NuGetGallery.Core/NuGetVersionExtensions.cs
Expand Up @@ -36,7 +36,9 @@ public static string ToFullString(string version)
public static class NuGetVersionExtensions
{
private const RegexOptions SemanticVersionRegexFlags = RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture;
private static readonly Regex SemanticVersionRegex = new Regex(@"^(?<Version>\d+(\s*\.\s*\d+){0,3})(?<Release>-[a-z][0-9a-z-]*)?$", SemanticVersionRegexFlags);
private static readonly Regex SemanticVersionRegex = RegexEx.CreateWithTimeout(
@"^(?<Version>\d+(\s*\.\s*\d+){0,3})(?<Release>-[a-z][0-9a-z-]*)?$",
SemanticVersionRegexFlags);

public static string ToNormalizedStringSafe(this NuGetVersion self)
{
Expand Down
4 changes: 3 additions & 1 deletion src/NuGetGallery.Core/Packaging/PackageIdValidator.cs
Expand Up @@ -10,7 +10,9 @@ namespace NuGetGallery.Packaging
{
public static class PackageIdValidator
{
private static readonly Regex IdRegex = new Regex(@"^\w+([_.-]\w+)*$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture);
private static readonly Regex IdRegex = RegexEx.CreateWithTimeout(
@"^\w+([.-]\w+)*$",
RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture);

public static bool IsValidPackageId(string packageId)
{
Expand Down
@@ -1,5 +1,7 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
Expand Down Expand Up @@ -173,8 +175,8 @@ private string ParseContentDisposition(byte[] buffer)
//
// We want to extract the file name out of it.
string content = _encoding.GetString(buffer);
var match = Regex.Match(content, @"filename\=""(.*)\""");
if (match.Success && match.Groups.Count > 1)
var match = RegexEx.MatchWithTimeoutOrNull(content, @"filename\=""(.*)\""", RegexOptions.None);
if (match != null && match.Success && match.Groups.Count > 1)
{
string filename = match.Groups[1].Value;
return filename;
Expand Down
Expand Up @@ -17,7 +17,7 @@ public static class NuGetPackagePattern
/// <returns><c>true</c> if the string matches the given pattern; otherwise <c>false</c>.</returns>
public static bool MatchesPackagePattern(this string str, string globPattern)
{
return new Regex(
return RegexEx.CreateWithTimeout(
"^" + Regex.Escape(globPattern).Replace(@"\*", ".*") + "$",
RegexOptions.IgnoreCase | RegexOptions.Singleline
).IsMatch(str);
Expand Down
Expand Up @@ -16,7 +16,9 @@ namespace NuGetGallery.Authentication.Providers
public abstract class Authenticator
{
public const string AuthPrefix = "Auth.";
private static readonly Regex NameShortener = new Regex(@"^(?<shortname>[A-Za-z0-9_]*)Authenticator$");
private static readonly Regex NameShortener = RegexEx.CreateWithTimeout(
@"^(?<shortname>[A-Za-z0-9_]*)Authenticator$",
RegexOptions.None);

public AuthenticatorConfiguration BaseConfig { get; private set; }

Expand Down
Expand Up @@ -15,7 +15,9 @@ namespace NuGetGallery
{
public class ReservedNamespaceService : IReservedNamespaceService
{
private static readonly Regex NamespaceRegex = new Regex(@"^\w+([_.-]\w+)*[.-]?$", RegexOptions.Compiled | RegexOptions.ExplicitCapture);
private static readonly Regex NamespaceRegex = RegexEx.CreateWithTimeout(
@"^\w+([.-]\w+)*[.-]?$",
RegexOptions.Compiled | RegexOptions.ExplicitCapture);

public IEntitiesContext EntitiesContext { get; protected set; }
public IEntityRepository<ReservedNamespace> ReservedNamespaceRepository { get; protected set; }
Expand Down
6 changes: 3 additions & 3 deletions src/NuGetGallery/Helpers/HtmlExtensions.cs
Expand Up @@ -69,7 +69,7 @@ void appendText(StringBuilder builder, string inputText)
encodedText = encodedText.Replace("\n", "<br />");

// Replace more than one space in a row with a space then &nbsp;.
encodedText = RegexEx.TryReplaceWithTimeout(
encodedText = RegexEx.ReplaceWithTimeoutOrOriginal(
encodedText,
" +",
match => " " + string.Join(string.Empty, Enumerable.Repeat("&nbsp;", match.Value.Length - 1)),
Expand Down Expand Up @@ -101,7 +101,7 @@ void appendUrl(StringBuilder builder, string inputText)
string siteRoot = configurationService.GetSiteRoot(useHttps: true);

// Format links to NuGet packages
Match packageMatch = RegexEx.MatchWithTimeout(
Match packageMatch = RegexEx.MatchWithTimeoutOrNull(
formattedUri,
$@"({Regex.Escape(siteRoot)}\/packages\/(?<name>\w+([_.-]\w+)*(\/[0-9a-zA-Z-.]+)?)\/?$)",
RegexOptions.IgnoreCase);
Expand All @@ -124,7 +124,7 @@ void appendUrl(StringBuilder builder, string inputText)

// Turn HTTP and HTTPS URLs into links.
// Source: https://stackoverflow.com/a/4750468
var matches = RegexEx.MatchesWithTimeout(
var matches = RegexEx.MatchesWithTimeoutOrNull(
text,
@"((http|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&amp;:/~\+#]*[\w\-\@?^=%&amp;/~\+#])?)",
RegexOptions.IgnoreCase);
Expand Down
59 changes: 0 additions & 59 deletions src/NuGetGallery/Helpers/RegexEx.cs

This file was deleted.

1 change: 0 additions & 1 deletion src/NuGetGallery/NuGetGallery.csproj
Expand Up @@ -362,7 +362,6 @@
<Compile Include="Helpers\StreamHelper.cs" />
<Compile Include="Helpers\TextHelper.cs" />
<Compile Include="Helpers\ZipArchiveHelpers.cs" />
<Compile Include="Helpers\RegexEx.cs" />
<Compile Include="Helpers\RouteUrlTemplate.cs" />
<Compile Include="Infrastructure\Lucene\HttpClientWrapper.cs" />
<Compile Include="Infrastructure\Lucene\IResilientSearchClient.cs" />
Expand Down
2 changes: 1 addition & 1 deletion src/NuGetGallery/Services/ReadMeService.cs
Expand Up @@ -320,7 +320,7 @@ private static async Task<string> ReadMaxAsync(Stream stream, int maxSize, Encod
return encoding.GetString(buffer).Trim('\0');
}

private static readonly Regex NewLineRegex = new Regex(@"\n|\r\n");
private static readonly Regex NewLineRegex = RegexEx.CreateWithTimeout(@"\n|\r\n", RegexOptions.None);

private static string NormalizeNewLines(string content)
{
Expand Down
4 changes: 2 additions & 2 deletions src/NuGetGallery/Services/TyposquattingDistanceCalculation.cs
Expand Up @@ -41,8 +41,8 @@ public static bool IsDistanceLessThanThreshold(string str1, string str2, int thr
throw new ArgumentNullException(nameof(str2));
}

var newStr1 = Regex.Replace(str1, SpecialCharactersToString, string.Empty);
var newStr2 = Regex.Replace(str2, SpecialCharactersToString, string.Empty);
var newStr1 = RegexEx.ReplaceWithTimeout(str1, SpecialCharactersToString, string.Empty, RegexOptions.None);
var newStr2 = RegexEx.ReplaceWithTimeout(str2, SpecialCharactersToString, string.Empty, RegexOptions.None);
if (Math.Abs(newStr1.Length - newStr2.Length) > threshold)
{
return false;
Expand Down

0 comments on commit 25d2d3b

Please sign in to comment.