Skip to content

Commit

Permalink
Merge 5a7cc0d into bef611b
Browse files Browse the repository at this point in the history
  • Loading branch information
longhotsummer committed Oct 26, 2020
2 parents bef611b + 5a7cc0d commit 1135214
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 4 deletions.
2 changes: 1 addition & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Changes
2.5 (unreleased)
----------------

- Nothing changed yet.
- Make it possible to adjust the attributes considered when comparing nodes.


2.4 (2019-10-09)
Expand Down
2 changes: 2 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,5 +84,7 @@ Contributors

* Albertas Agejevas, alga@shoobx.com

* Greg Kempe, greg@laws.africa

The diff algorithm is based on "`Change Detection in Hierarchically Structured Information <http://ilpubs.stanford.edu/115/1/1995-46.pdf>`_",
and the text diff is using Google's ``diff_match_patch`` algorithm.
21 changes: 21 additions & 0 deletions tests/test_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -1466,3 +1466,24 @@ def test_issue_21_default_namespaces(self):
right = '<tag xmlns="ns">new</tag>'
result = self._diff(left, right)
self.assertEqual(result[0].node, "/*[1]")

def test_ignore_attribute(self):
# this differ ignores the attribute 'skip' when diffing
class IgnoringDiffer(Differ):
def node_attribs(self, node):
if 'skip' in node.attrib:
attribs = dict(node.attrib)
del attribs['skip']
return attribs
return node.attrib

left = '<a><b foo="bar" skip="boom">text</b></a>'
right = '<a><b foo="bar" skip="different">text</b></a>'

parser = etree.XMLParser(remove_blank_text=True)
left_tree = etree.fromstring(left, parser)
right_tree = etree.fromstring(right, parser)
differ = IgnoringDiffer()
differ.set_trees(left_tree, right_tree)
editscript = list(differ.diff())
self.assertEqual(editscript, [])
10 changes: 7 additions & 3 deletions xmldiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def node_text(self, node):
texts = node.xpath("text()")

# Then add attributes and values
for tag, value in sorted(node.attrib.items()):
for tag, value in sorted(self.node_attribs(node).items()):
if tag[0] == "{":
tag = tag.split(
"}",
Expand All @@ -199,6 +199,10 @@ def node_text(self, node):
self._text_cache[node] = result
return result

def node_attribs(self, node):
"""Return a dict of attributes to consider for this node."""
return node.attrib

def leaf_ratio(self, left, right):
# How similar two nodes are, with no consideration of their children
# We use a simple ratio here, I tried Levenshtein distances
Expand Down Expand Up @@ -237,8 +241,8 @@ def update_node_attr(self, left, right):

# Update: Look for differences in attributes

left_keys = set(left.attrib.keys())
right_keys = set(right.attrib.keys())
left_keys = set(self.node_attribs(left).keys())
right_keys = set(self.node_attribs(right).keys())
new_keys = right_keys.difference(left_keys)
removed_keys = left_keys.difference(right_keys)
common_keys = left_keys.intersection(right_keys)
Expand Down

0 comments on commit 1135214

Please sign in to comment.