From 163f22757444b77540902be442aa1a7befa8ee82 Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Tue, 27 Apr 2021 14:41:49 -0400 Subject: [PATCH] treat missing rows as a mismatch Throwing `KeyNotFound` is not helpful for larger analsyis. Had to loosed a lot of non-null strings because we won't have any context from the "right" file, so need to pass some more nulls around when building output. --- src/XlsxCompare.Tests/XlsxComparerTests.cs | 50 ++++++++++++++++++++++ src/XlsxCompare/Mismatch.cs | 2 +- src/XlsxCompare/ResultsWriter.cs | 6 +-- src/XlsxCompare/XlsxComparer.cs | 12 +++++- 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/src/XlsxCompare.Tests/XlsxComparerTests.cs b/src/XlsxCompare.Tests/XlsxComparerTests.cs index 3d3c076..1ee2e1e 100644 --- a/src/XlsxCompare.Tests/XlsxComparerTests.cs +++ b/src/XlsxCompare.Tests/XlsxComparerTests.cs @@ -61,5 +61,55 @@ public void Compare_SomeErrors_ReturnsCorrectResult() Assert.AreEqual(expectedContext[id], newId); } } + + [TestMethod] + public void Compare_NoJoinMatchesWithContext_ReturnsMissingDataAsMismatch() + { + var opts = new CompareOptions( + LeftKeyColumn: "id", + RightKeyColumn: "new_id", + new[]{ + new Assertion("Email", "EML") + }, + new ResultOptions( + LeftColumnNames: new[] { "Name" } + ) + ); + var results = _comparer.Compare("left.xlsx", "right.xlsx", opts); + + Assert.AreEqual(3, results.TotalCellMismatches); + Assert.AreEqual(3, results.TotalRowMismatches); + + var expectedContext = new Dictionary(){ + {"1", "leading trailing spaces"}, + {"2", "null email"}, + {"3", "whitespace address"} + }; + + foreach (var mismatch in results.Mismatches) + { + var id = mismatch.Key; + var name = mismatch.Context["Name"]; + Assert.AreEqual(expectedContext[id], name); + } + } + + [TestMethod] + public void Compare_NoJoinMatchesWithIgnoringMissingRows_ReturnsNoMismatches() + { + var opts = new CompareOptions( + LeftKeyColumn: "id", + RightKeyColumn: "new_id", + Assertions: new[]{ + new Assertion("Email", "EML") + }, + ResultOptions: new ResultOptions(), + IgnoreMissingRows: true + ); + var results = _comparer.Compare("left.xlsx", "right.xlsx", opts); + + Assert.AreEqual(0, results.TotalCellMismatches); + Assert.AreEqual(0, results.TotalRowMismatches); + } } } diff --git a/src/XlsxCompare/Mismatch.cs b/src/XlsxCompare/Mismatch.cs index bef27e8..fee887d 100644 --- a/src/XlsxCompare/Mismatch.cs +++ b/src/XlsxCompare/Mismatch.cs @@ -6,7 +6,7 @@ record Mismatch( Assertion Assertion, string Key, string LeftValue, - string RightValue, + string? RightValue, IReadOnlyDictionary Context ); } diff --git a/src/XlsxCompare/ResultsWriter.cs b/src/XlsxCompare/ResultsWriter.cs index a11a595..024d8ef 100644 --- a/src/XlsxCompare/ResultsWriter.cs +++ b/src/XlsxCompare/ResultsWriter.cs @@ -57,11 +57,11 @@ private static IEnumerable GetHeaders(ResultOptions opts) yield return opts.RightValueHeader; } - private static IEnumerable GetValues(ResultOptions opts, Mismatch mismatch) + private static IEnumerable GetValues(ResultOptions opts, Mismatch mismatch) { foreach (var col in opts.ContextColumnNames) { - yield return mismatch.Context[col]; + yield return mismatch.Context.GetValueOrDefault(col); } yield return mismatch.Assertion.LeftColumnName; // TODO: maybe format? @@ -69,7 +69,7 @@ private static IEnumerable GetValues(ResultOptions opts, Mismatch mismat yield return mismatch.RightValue; } - private static void WriteRow(ExcelWorksheet sheet, int row, IEnumerable values) + private static void WriteRow(ExcelWorksheet sheet, int row, IEnumerable values) { var col = 1; foreach (var value in values) diff --git a/src/XlsxCompare/XlsxComparer.cs b/src/XlsxCompare/XlsxComparer.cs index 1e4acf0..a270322 100644 --- a/src/XlsxCompare/XlsxComparer.cs +++ b/src/XlsxCompare/XlsxComparer.cs @@ -29,13 +29,22 @@ public CompareResult Compare(string leftPath, string rightPath, CompareOptions o private IEnumerable Compare(XlsxFacade left, XlsxFacade right, CompareOptions opts) { _logger.LogInformation("Comparing {LeftXlsx} to {RightXlsx}", left, right); + var joinAssertion = new Assertion(opts.LeftKeyColumn, opts.RightKeyColumn); foreach (var leftRow in left.Rows) { var key = left.GetSafeValue(leftRow, opts.LeftKeyColumn); + var leftContext = new Dictionary(GetContext(left, leftRow, opts.ResultOptions.LeftColumnNames)); if (!right.TryFindRow(opts.RightKeyColumn, key, out var rightRow)) { if (opts.IgnoreMissingRows) { continue; } - throw new KeyNotFoundException($"Could not find '{key}' in {opts.RightKeyColumn}"); + yield return new Mismatch( + Assertion: joinAssertion, + Key: key, + LeftValue: key, + RightValue: null, + Context: leftContext + ); + continue; } foreach (var assertion in opts.Assertions) @@ -45,7 +54,6 @@ private IEnumerable Compare(XlsxFacade left, XlsxFacade right, Compare if (!assertion.IsMatch(leftValue, rightValue)) { - var leftContext = GetContext(left, leftRow, opts.ResultOptions.LeftColumnNames); var rightContext = GetContext(right, rightRow, opts.ResultOptions.RightColumnNames); var context = new Dictionary( leftContext.Concat(rightContext));