From c178ed2106e0e6943eed8f39b7d91e12db09e6fb Mon Sep 17 00:00:00 2001 From: Greg Kempe Date: Fri, 23 Oct 2020 09:53:56 +0200 Subject: [PATCH 1/3] Make it possible to adjust the attributes used when comparing nodes. Subclasses can use this determine which attributes for a node should be considered when diffing. By default, they are all considered. --- CHANGES.rst | 2 +- tests/test_diff.py | 21 +++++++++++++++++++++ xmldiff/diff.py | 11 ++++++++--- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index b56bebd..5d79429 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,7 @@ Changes 2.5 (unreleased) ---------------- -- Nothing changed yet. +- Make it possible to adjust the attributes considered when comparing nodes. 2.4 (2019-10-09) diff --git a/tests/test_diff.py b/tests/test_diff.py index 643e941..07155a4 100644 --- a/tests/test_diff.py +++ b/tests/test_diff.py @@ -1458,3 +1458,24 @@ def test_issue_21_default_namespaces(self): right = 'new' result = self._diff(left, right) self.assertEqual(result[0].node, "/*[1]") + + def test_ignore_attribute(self): + # this differ ignores the attribute 'skip' when diffing + class IgnoringDiffer(Differ): + def node_attribs(self, node): + if 'skip' in node.attrib: + attribs = dict(node.attrib) + del attribs['skip'] + return attribs + return node.attrib + + left = 'text' + right = 'text' + + parser = etree.XMLParser(remove_blank_text=True) + left_tree = etree.fromstring(left, parser) + right_tree = etree.fromstring(right, parser) + differ = IgnoringDiffer() + differ.set_trees(left_tree, right_tree) + editscript = list(differ.diff()) + self.assertEqual(editscript, []) diff --git a/xmldiff/diff.py b/xmldiff/diff.py index 5648c26..a30a3fd 100644 --- a/xmldiff/diff.py +++ b/xmldiff/diff.py @@ -186,7 +186,7 @@ def node_text(self, node): texts = node.xpath("text()") # Then add attributes and values - for tag, value in sorted(node.attrib.items()): + for tag, value in sorted(self.node_attribs(node).items()): if tag[0] == "{": tag = tag.split("}",)[-1] texts.append(f"{tag}:{value}") @@ -197,6 +197,11 @@ def node_text(self, node): self._text_cache[node] = result return result + def node_attribs(self, node): + """ Return a dict of attributes to consider for this node. + """ + return node.attrib + def leaf_ratio(self, left, right): # How similar two nodes are, with no consideration of their children # We use a simple ratio here, I tried Levenshtein distances @@ -235,8 +240,8 @@ def update_node_attr(self, left, right): # Update: Look for differences in attributes - left_keys = set(left.attrib.keys()) - right_keys = set(right.attrib.keys()) + left_keys = set(self.node_attribs(left).keys()) + right_keys = set(self.node_attribs(right).keys()) new_keys = right_keys.difference(left_keys) removed_keys = left_keys.difference(right_keys) common_keys = left_keys.intersection(right_keys) From 155a3bcbfd5d8b191366afc1eec4e08489352eee Mon Sep 17 00:00:00 2001 From: Greg Kempe Date: Fri, 23 Oct 2020 10:22:10 +0200 Subject: [PATCH 2/3] Make black happy for contributed code --- xmldiff/diff.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xmldiff/diff.py b/xmldiff/diff.py index a30a3fd..a9b33a7 100644 --- a/xmldiff/diff.py +++ b/xmldiff/diff.py @@ -198,8 +198,7 @@ def node_text(self, node): return result def node_attribs(self, node): - """ Return a dict of attributes to consider for this node. - """ + """Return a dict of attributes to consider for this node.""" return node.attrib def leaf_ratio(self, left, right): From 5a7cc0d44e60c698e1f6cddf3f56e564d38c3e7b Mon Sep 17 00:00:00 2001 From: Greg Kempe Date: Mon, 26 Oct 2020 16:29:00 +0200 Subject: [PATCH 3/3] Update contributors list --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index 5395a9d..df9203a 100644 --- a/README.rst +++ b/README.rst @@ -84,5 +84,7 @@ Contributors * Albertas Agejevas, alga@shoobx.com + * Greg Kempe, greg@laws.africa + The diff algorithm is based on "`Change Detection in Hierarchically Structured Information `_", and the text diff is using Google's ``diff_match_patch`` algorithm.