Skip to content

Commit

Permalink
support matching by unordered tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
ryepup committed Apr 23, 2021
1 parent 4bed2a7 commit 8fedf0b
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ This config will:
|`date`|parse as dates before comparison|`2021-04-02` matches `20210402` and `4/2/2021 3:45PM`, but not `2021-04-03`|
|`stringLeftStartsWithRight`|the left value must start with the right value|`testing` matches `test`, but not `testing with suffix`|
|`stringRightStartsWithLeft`|the right value must start with the left value|`test` matches `testing`|
|`tokens`|each value must contain the same tokens, after splitting on whitespace. This is useful for addresses that have the same components in a different order, or other data where the ordering is insignificant|`1234 S Main St` matches `1234 Main St S`|

## Developing

Expand Down
5 changes: 5 additions & 0 deletions src/XlsxCompare.Tests/MatchByTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ public class MatchByTests
[DataRow(MatchBy.Decimal, "0.00", "0")]
[DataRow(MatchBy.StringRightStartsWithLeft, "asdf", "asdf and then some")]
[DataRow(MatchBy.StringRightStartsWithLeft, "", "")]
[DataRow(MatchBy.Tokens, "X Y", "Y X")]
[DataRow(MatchBy.Tokens, "X Y Z", " Z X Y")]
[DataRow(MatchBy.Tokens, "", "")]
public void IsMatch_ThingsThatMatch_ReturnsTrue(MatchBy? match, string left, string right)
{
Assert.IsTrue(match.IsMatch(left, right));
Expand All @@ -39,6 +42,8 @@ public void IsMatch_ThingsThatMatch_ReturnsTrue(MatchBy? match, string left, str
[DataRow(MatchBy.Decimal, "0.084400", "0.0845")]
[DataRow(MatchBy.StringRightStartsWithLeft, "", "asdf and then some")]
[DataRow(MatchBy.StringRightStartsWithLeft, "asdf but no", "asdf and then some")]
[DataRow(MatchBy.Tokens, "X Y", "Y Z")]
[DataRow(MatchBy.Tokens, "X", " ")]
public void IsMatch_ThingsThatDoNotMatch_ReturnsFalse(MatchBy? match, string left, string right)
{
Assert.IsFalse(match.IsMatch(left, right));
Expand Down
13 changes: 13 additions & 0 deletions src/XlsxCompare/MatchBy.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
using System;
using System.Collections.Generic;
using System.Linq;

namespace XlsxCompare
{
Expand All @@ -12,6 +14,7 @@ public enum MatchBy
Date,
StringLeftStartsWithRight,
StringRightStartsWithLeft,
Tokens,
}

static class MatchByExtensions
Expand All @@ -25,6 +28,7 @@ public static bool IsMatch(this MatchBy? match, string left, string right)
MatchBy.StringLeftStartsWithRight => IsLeftStartsWithRightMatch(left, right),
MatchBy.StringRightStartsWithLeft => IsLeftStartsWithRightMatch(right, left),
MatchBy.Decimal => IsDecimalMatch(left, right),
MatchBy.Tokens => IsTokenMatch(left, right),
_ => IsStringMatch(left, right),
};

Expand Down Expand Up @@ -56,5 +60,14 @@ private static bool IsDecimalMatch(string left, string right)
|| (decimal.TryParse(left, out var leftInt)
&& decimal.TryParse(right, out var rightInt)
&& leftInt == rightInt);

private static bool IsTokenMatch(string left, string right)
=> IsTokenMatch(Tokenize(left), Tokenize(right));

private static bool IsTokenMatch(IEnumerable<string> left, IEnumerable<string> right)
=> !left.Except(right, StringComparer.OrdinalIgnoreCase).Any();

private static IEnumerable<string> Tokenize(string value)
=> value.Split(' ').Select(x => x.Trim());
}
}

0 comments on commit 8fedf0b

Please sign in to comment.