Skip to content

Commit

Permalink
treat missing rows as a mismatch
Browse files Browse the repository at this point in the history
Throwing `KeyNotFound` is not helpful for larger analsyis.

Had to loosed a lot of non-null strings because we won't have any context from the "right" file, so need to pass some more nulls around when building output.
  • Loading branch information
ryepup committed Apr 27, 2021
1 parent c872e42 commit 163f227
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 6 deletions.
50 changes: 50 additions & 0 deletions src/XlsxCompare.Tests/XlsxComparerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,55 @@ public void Compare_SomeErrors_ReturnsCorrectResult()
Assert.AreEqual(expectedContext[id], newId);
}
}

[TestMethod]
public void Compare_NoJoinMatchesWithContext_ReturnsMissingDataAsMismatch()
{
var opts = new CompareOptions(
LeftKeyColumn: "id",
RightKeyColumn: "new_id",
new[]{
new Assertion("Email", "EML")
},
new ResultOptions(
LeftColumnNames: new[] { "Name" }
)
);
var results = _comparer.Compare("left.xlsx", "right.xlsx", opts);

Assert.AreEqual(3, results.TotalCellMismatches);
Assert.AreEqual(3, results.TotalRowMismatches);

var expectedContext = new Dictionary<string, string>(){
{"1", "leading trailing spaces"},
{"2", "null email"},
{"3", "whitespace address"}
};

foreach (var mismatch in results.Mismatches)
{
var id = mismatch.Key;
var name = mismatch.Context["Name"];
Assert.AreEqual(expectedContext[id], name);
}
}

[TestMethod]
public void Compare_NoJoinMatchesWithIgnoringMissingRows_ReturnsNoMismatches()
{
var opts = new CompareOptions(
LeftKeyColumn: "id",
RightKeyColumn: "new_id",
Assertions: new[]{
new Assertion("Email", "EML")
},
ResultOptions: new ResultOptions(),
IgnoreMissingRows: true
);
var results = _comparer.Compare("left.xlsx", "right.xlsx", opts);

Assert.AreEqual(0, results.TotalCellMismatches);
Assert.AreEqual(0, results.TotalRowMismatches);
}
}
}
2 changes: 1 addition & 1 deletion src/XlsxCompare/Mismatch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ record Mismatch(
Assertion Assertion,
string Key,
string LeftValue,
string RightValue,
string? RightValue,
IReadOnlyDictionary<string, string> Context
);
}
6 changes: 3 additions & 3 deletions src/XlsxCompare/ResultsWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,19 @@ private static IEnumerable<string> GetHeaders(ResultOptions opts)
yield return opts.RightValueHeader;
}

private static IEnumerable<string> GetValues(ResultOptions opts, Mismatch mismatch)
private static IEnumerable<string?> GetValues(ResultOptions opts, Mismatch mismatch)
{
foreach (var col in opts.ContextColumnNames)
{
yield return mismatch.Context[col];
yield return mismatch.Context.GetValueOrDefault(col);
}
yield return mismatch.Assertion.LeftColumnName;
// TODO: maybe format?
yield return mismatch.LeftValue;
yield return mismatch.RightValue;
}

private static void WriteRow(ExcelWorksheet sheet, int row, IEnumerable<string> values)
private static void WriteRow(ExcelWorksheet sheet, int row, IEnumerable<string?> values)
{
var col = 1;
foreach (var value in values)
Expand Down
12 changes: 10 additions & 2 deletions src/XlsxCompare/XlsxComparer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,22 @@ public CompareResult Compare(string leftPath, string rightPath, CompareOptions o
private IEnumerable<Mismatch> Compare(XlsxFacade left, XlsxFacade right, CompareOptions opts)
{
_logger.LogInformation("Comparing {LeftXlsx} to {RightXlsx}", left, right);
var joinAssertion = new Assertion(opts.LeftKeyColumn, opts.RightKeyColumn);
foreach (var leftRow in left.Rows)
{
var key = left.GetSafeValue(leftRow, opts.LeftKeyColumn);
var leftContext = new Dictionary<string, string>(GetContext(left, leftRow, opts.ResultOptions.LeftColumnNames));
if (!right.TryFindRow(opts.RightKeyColumn, key, out var rightRow))
{
if (opts.IgnoreMissingRows) { continue; }
throw new KeyNotFoundException($"Could not find '{key}' in {opts.RightKeyColumn}");
yield return new Mismatch(
Assertion: joinAssertion,
Key: key,
LeftValue: key,
RightValue: null,
Context: leftContext
);
continue;
}

foreach (var assertion in opts.Assertions)
Expand All @@ -45,7 +54,6 @@ private IEnumerable<Mismatch> Compare(XlsxFacade left, XlsxFacade right, Compare

if (!assertion.IsMatch(leftValue, rightValue))
{
var leftContext = GetContext(left, leftRow, opts.ResultOptions.LeftColumnNames);
var rightContext = GetContext(right, rightRow, opts.ResultOptions.RightColumnNames);
var context = new Dictionary<string, string>(
leftContext.Concat(rightContext));
Expand Down

0 comments on commit 163f227

Please sign in to comment.