Skip to content

Commit

Permalink
Use PCRE library for regex (#76)
Browse files Browse the repository at this point in the history
  • Loading branch information
Marusyk committed Sep 1, 2023
1 parent 1923625 commit dc2c26f
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 46 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Cross platform .NET grok implementation as a NuGet package
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/Marusyk/grok.net/blob/main/LICENSE)
[![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/Marusyk/grok.net/blob/main/CONTRIBUTING.md)

[![NuGet version](https://badge.fury.io/nu/grok.net.svg)](https://badge.fury.io/nu/grok.net)
[![NuGet version](https://img.shields.io/nuget/v/grok.net.svg?logo=NuGet)](https://www.nuget.org/packages/grok.net)
[![Nuget](https://img.shields.io/nuget/dt/grok.net.svg)](https://www.nuget.org/packages/Grok.Net)
[![PowerShell Gallery Version](https://img.shields.io/powershellgallery/v/Grok)](https://www.powershellgallery.com/packages/Grok)
[![PowerShell Gallery](https://img.shields.io/powershellgallery/dt/Grok)](https://www.powershellgallery.com/packages/Grok)
Expand Down
34 changes: 16 additions & 18 deletions benchmark/ParseBenchmark.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,55 +11,53 @@ public class ParseBenchmark
private static readonly Grok _grokCustom = new("%{ZIPCODE:zipcode}:%{EMAILADDRESS:email}");

[Benchmark]
public void Empty()
public GrokResult Empty()
{
_ = _grokEmpty.Parse("");
return _grokEmpty.Parse("");
}

[Benchmark]
public void Custom()
public GrokResult Custom()
{
_ = _grokCustom.Parse("06590:halil.i.kocaoz@gmail.com");
return _grokCustom.Parse("06590:halil.i.kocaoz@gmail.com");
}

[Benchmark]
public void Log()
public GrokResult Log()
{
_ = _grokLog.Parse(@"06-21-19 21:00:13:589241;15;INFO;main;DECODED: 775233900043 DECODED BY: 18500738 DISTANCE: 1.5165
return _grokLog.Parse(@"06-21-19 21:00:13:589241;15;INFO;main;DECODED: 775233900043 DECODED BY: 18500738 DISTANCE: 1.5165
06-21-19 21:00:13:589265;156;WARN;main;DECODED: 775233900043 EMPTY DISTANCE: --------");
}

[Params("DBG", "INF", "WARN", "ERR")]
public string LogLevel { get; set; }

[Benchmark]
public void LogWithParam()
public bool LogWithParam()
{
const string logLevel = "INF";
GrokResult grokResult = _grokLog.Parse($@"06-21-19 21:00:13:589241;15;INFO;main;DECODED: 775233900043 DECODED BY: 18500738 DISTANCE: 1.5165
06-21-19 21:00:13:589265;156;{LogLevel};main;DECODED: 775233900043 EMPTY DISTANCE: --------");
06-21-19 21:00:13:589265;156;{logLevel};main;DECODED: 775233900043 EMPTY DISTANCE: --------");

bool resut = (string)grokResult[0].Value == LogLevel;
return (string)grokResult[0].Value == logLevel;
}

[Benchmark]
public void EmptyLocal()
public GrokResult EmptyLocal()
{
Grok grokEmptyLocal = new Grok("");
_ = grokEmptyLocal.Parse("");
return grokEmptyLocal.Parse("");
}

[Benchmark]
public void CustomLocal()
public GrokResult CustomLocal()
{
Grok grokCustomLocal = new Grok("%{ZIPCODE:zipcode}:%{EMAILADDRESS:email}");
_ = grokCustomLocal.Parse("06590:halil.i.kocaoz@gmail.com");
return grokCustomLocal.Parse("06590:halil.i.kocaoz@gmail.com");
}

[Benchmark]
public void LogLocal()
public GrokResult LogLocal()
{
Grok grokLogLocal = new Grok("%{MONTHDAY:month}-%{MONTHDAY:day}-%{MONTHDAY:year} %{TIME:timestamp};%{WORD:id};%{LOGLEVEL:loglevel};%{WORD:func};%{GREEDYDATA:msg}");
_ = grokLogLocal.Parse(@"06-21-19 21:00:13:589241;15;INFO;main;DECODED: 775233900043 DECODED BY: 18500738 DISTANCE: 1.5165
return grokLogLocal.Parse(@"06-21-19 21:00:13:589241;15;INFO;main;DECODED: 775233900043 DECODED BY: 18500738 DISTANCE: 1.5165
06-21-19 21:00:13:589265;156;WARN;main;DECODED: 775233900043 EMPTY DISTANCE: --------");
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/Grok.Net.Tests/UnitTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using GrokNet;
using PCRE;
using Xunit;

namespace GrokNetTests
Expand Down Expand Up @@ -311,7 +311,7 @@ public void Parse_Multiline_String_As_A_Single_Line_With_Regex_Options_Specified
// Arrange
const string timeKeyword = "loggingTime";
const string messageKeyword = "message";
const RegexOptions options = RegexOptions.Singleline;
const PcreOptions options = PcreOptions.Singleline;

var multilineGrok = new Grok($"%{{TIMESTAMP_ISO8601:{timeKeyword}}} %{{GREEDYDATA:{messageKeyword}}}", options);

Expand All @@ -338,7 +338,7 @@ Second line
public void Load_Custom_Patterns_From_Stream_And_Parse_With_Regex_Options_Specified()
{
// Arrange
const RegexOptions options = RegexOptions.Singleline;
const PcreOptions options = PcreOptions.Singleline;
const string zipcode = "122001";
const string email = "Bob.Davis@microsoft.com";

Expand All @@ -356,7 +356,7 @@ public void Load_Custom_Patterns_From_Stream_And_Parse_With_Regex_Options_Specif
public void Load_Custom_Patterns_And_Parse_With_Regex_Options_Specified()
{
// Arrange
const RegexOptions options = RegexOptions.Singleline;
const PcreOptions options = PcreOptions.Singleline;
const string zipcode = "122001";
var customPatterns = new Dictionary<string, string>
{
Expand Down
3 changes: 2 additions & 1 deletion src/Grok.Net/Grok.Net.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
</PropertyGroup>

<PropertyGroup>
<Version>1.2.0</Version>
<Version>2.0.0</Version>
</PropertyGroup>

<ItemGroup>
Expand All @@ -32,6 +32,7 @@

<ItemGroup>
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.1.1" PrivateAssets="All" />
<PackageReference Include="PCRE.NET" Version="0.20.0" />
</ItemGroup>

<ItemGroup>
Expand Down
44 changes: 22 additions & 22 deletions src/Grok.Net/Grok.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
using System.Linq;
using System.Reflection;
using System.Text;
using System.Text.RegularExpressions;
using PCRE;

namespace GrokNet
{
Expand All @@ -13,14 +13,14 @@ public class Grok
private readonly string _grokPattern;
private readonly Dictionary<string, string> _patterns;
private readonly Dictionary<string, string> _typeMaps;
private Regex _compiledRegex;
private PcreRegex _compiledRegex;
private IReadOnlyList<string> _patternGroupNames;
private const RegexOptions _defaultRegexOptions = RegexOptions.Compiled | RegexOptions.ExplicitCapture;
private readonly RegexOptions _regexOptions;
private const PcreOptions _defaultRegexOptions = PcreOptions.Compiled | PcreOptions.ExplicitCapture;
private readonly PcreOptions _regexOptions;

private static readonly Regex _grokRegex = new Regex("%{(\\w+):(\\w+)(?::\\w+)?}", RegexOptions.Compiled);
private static readonly Regex _grokRegexWithType = new Regex("%{(\\w+):(\\w+):(\\w+)?}", RegexOptions.Compiled);
private static readonly Regex _grokWithoutName = new Regex("%{(\\w+)}", RegexOptions.Compiled);
private static readonly PcreRegex _grokRegex = new PcreRegex("%{(\\w+):(\\w+)(?::\\w+)?}", PcreOptions.Compiled);
private static readonly PcreRegex _grokRegexWithType = new PcreRegex("%{(\\w+):(\\w+):(\\w+)?}", PcreOptions.Compiled);
private static readonly PcreRegex _grokWithoutName = new PcreRegex("%{(\\w+)}", PcreOptions.Compiled);

/// <summary>
/// Initializes a new instance of the <see cref="Grok"/> class with the specified Grok pattern.
Expand All @@ -41,7 +41,7 @@ public Grok(string grokPattern)
/// </summary>
/// <param name="grokPattern">The Grok pattern to use.</param>
/// <param name="regexOptions">Additional regex options.</param>
public Grok(string grokPattern, RegexOptions regexOptions)
public Grok(string grokPattern, PcreOptions regexOptions)
: this(grokPattern)
{
_regexOptions = _defaultRegexOptions | regexOptions;
Expand All @@ -65,7 +65,7 @@ public Grok(string grokPattern, Stream customPatterns)
/// <param name="grokPattern">The Grok pattern to use.</param>
/// <param name="customPatterns">A stream containing custom patterns.</param>
/// <param name="regexOptions">Additional regex options.</param>
public Grok(string grokPattern, Stream customPatterns, RegexOptions regexOptions)
public Grok(string grokPattern, Stream customPatterns, PcreOptions regexOptions)
: this(grokPattern, regexOptions)
{
LoadCustomPatterns(customPatterns);
Expand All @@ -84,12 +84,12 @@ public Grok(string grokPattern, IDictionary<string, string> customPatterns)

/// <summary>
/// Initialized a new instance of the <see cref="Grok"/> class with specified Grok pattern,
/// custom patterns if necessary, and custom <see cref="RegexOptions"/> .
/// custom patterns if necessary, and custom <see cref="PcreOptions"/> .
/// </summary>
/// <param name="grokPattern">The Grok pattern to use.</param>
/// <param name="customPatterns">Custom patterns to add.</param>
/// <param name="regexOptions">Additional regex options.</param>
public Grok(string grokPattern, IDictionary<string, string> customPatterns, RegexOptions regexOptions)
public Grok(string grokPattern, IDictionary<string, string> customPatterns, PcreOptions regexOptions)
: this(grokPattern, regexOptions)
{
LoadCustomPatterns(customPatterns);
Expand All @@ -109,7 +109,7 @@ public GrokResult Parse(string text)

var grokItems = new List<GrokItem>();

foreach (Match match in _compiledRegex.Matches(text))
foreach (PcreMatch match in _compiledRegex.Matches(text))
{
foreach (string groupName in _patternGroupNames)
{
Expand Down Expand Up @@ -157,14 +157,14 @@ private void ParsePattern()
pattern = newPattern;
} while (!done);

_compiledRegex = new Regex(pattern, _regexOptions);
_patternGroupNames = _compiledRegex.GetGroupNames().ToList();
_compiledRegex = new PcreRegex(pattern, _regexOptions);
_patternGroupNames = _compiledRegex.PatternInfo.GroupNames.ToList();
}

private void ProcessTypeMappings(ref string pattern)
{
MatchCollection matches = _grokRegexWithType.Matches(string.IsNullOrEmpty(pattern) ? _grokPattern : pattern);
foreach (Match match in matches)
IEnumerable<PcreMatch> matches = _grokRegexWithType.Matches(string.IsNullOrEmpty(pattern) ? _grokPattern : pattern);
foreach (PcreMatch match in matches)
{
_typeMaps.Add(match.Groups[2].Value, match.Groups[3].Value);
}
Expand Down Expand Up @@ -258,25 +258,25 @@ private static void EnsurePatternIsValid(string pattern)
{
try
{
_ = Regex.Match("", pattern);
_ = PcreRegex.Match("", pattern);
}
catch (Exception e)
{
throw new FormatException($"Invalid regular expression {pattern}", e);
}
}

private string ReplaceWithName(Match match)
private string ReplaceWithName(PcreMatch match)
{
Group group1 = match.Groups[2];
Group group2 = match.Groups[1];
PcreGroup group1 = match.Groups[2];
PcreGroup group2 = match.Groups[1];

return _patterns.TryGetValue(group2.Value, out var str) ? $"(?<{group1}>{str})" : $"(?<{group1}>)";
}

private string ReplaceWithoutName(Match match)
private string ReplaceWithoutName(PcreMatch match)
{
Group group = match.Groups[1];
PcreGroup group = match.Groups[1];

if (_patterns.TryGetValue(group.Value, out _))
{
Expand Down

0 comments on commit dc2c26f

Please sign in to comment.