Skip to content

Commit

Permalink
Merge 3b82974 into c33271d
Browse files Browse the repository at this point in the history
  • Loading branch information
regebro committed Jun 26, 2018
2 parents c33271d + 3b82974 commit 312740e
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 82 deletions.
6 changes: 5 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ CHANGES
1.1.2 (unreleased)
------------------

- Nothing changed yet.
- When I fixed the xpath namespace handling I also changed the tag names to
an xpath syntax. This was unhelpful, so I changed that back. To solve this
I have had to extend the return format from the parser and ass a N_NSPREFIX
that contains the prefix. This is used by the differ to return correct
xpaths without changing the tags.


1.1.1 (2018-06-20)
Expand Down
2 changes: 1 addition & 1 deletion src/xmldiff/fmes.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def _before_attribute(self, parent_node, attr_node, new_name=None):
return attr_name

FAKE_TAG = [NT_NODE, 'LogilabXMLDIFFFAKETag', 'LogilabXMLDIFFFAKETag',
[], None, 0, 0, True, False]
[], None, 0, 0, None, True, False]

def _before_insert_text(self, parent, new_text, k):
""" check if a text node that will be remove has two sibbling text
Expand Down
23 changes: 18 additions & 5 deletions src/xmldiff/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@
N_PARENT = 4 # node's parent
N_ISSUE = 5 # node's total issue number
N_XNUM = 6 # to compute node's xpath
NSIZE = 7 # number of items in a list which represent a node
N_NSPREFIX = 7 # node's namespace prefix (if any)
NSIZE = 8 # number of items in a list which represent a node

# NODE TYPES
# NT_SYST = 0 # SYSTEM node (added by parser) /!\ deprecated
Expand Down Expand Up @@ -120,14 +121,26 @@ def caract(node):

def f_xpath(node, x=''):
""" compute node's xpath """
if node[N_NAME] != '/':
name = node[N_NAME]
if '{' in name:
# We have a namespace
pre, rest = name.split('{', 1)
uri, local_name = rest.split('}', 1)
prefix = node[N_NSPREFIX]
if prefix is None:
# Default namespace
name = pre + local_name
else:
name = '%s%s:%s' % (pre, prefix, local_name)

if name != '/':
if node[N_TYPE] == NT_ATTN:
return f_xpath(node[N_PARENT],
'/%s' % node[N_NAME][:len(node[N_NAME]) - 4])
'/%s' % name[:len(name) - 4])
if node[N_TYPE] == NT_ATTV:
return f_xpath(node[N_PARENT]) # [N_PARENT], '/%s'%node[N_NAME])
return f_xpath(node[N_PARENT]) # [N_PARENT], '/%s'%name)
return f_xpath(node[N_PARENT], '/%s[%d]%s' % (
node[N_NAME], node[N_XNUM], x))
name, node[N_XNUM], x))
elif not x:
return '/'
return x
Expand Down
64 changes: 42 additions & 22 deletions src/xmldiff/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class SaxHandler(ContentHandler):
"""

def __init__(self, normalize_space, include_comment):
self._p_stack = [[NT_ROOT, '/', '', [], None, 0, 0]]
self._p_stack = [[NT_ROOT, '/', '', [], None, 0, 0, None]]
self._norm_sp = normalize_space or None
self._incl_comm = include_comment or None
self._xpath = ''
Expand All @@ -70,40 +70,59 @@ def endPrefixMapping(self, prefix):

def _buildTag(self, ns_name_tuple):
ns_uri, local_name = ns_name_tuple

if ns_uri and ns_uri != self._default_ns:
ns_name = [x[0] for x in self._ns_mapping.items()
if ns_uri in x[1]][0]
return "%s:%s" % (ns_name, local_name)

if ns_uri:
el_tag = "{%s}%s" % ns_name_tuple
else:
el_tag = local_name
return el_tag

def _getPrefix(self, ns_uri):
if not ns_uri:
return None
for (prefix, uri) in self._ns_mapping.items():
if ns_uri in uri:
return prefix
if ns_uri == 'http://www.w3.org/XML/1998/namespace':
# It's the xml: namespace, undeclared.
return 'xml'
raise ValueError("No prefix found for namespace URI %s" % ns_uri)

# Don't know if I need this
def _buildXPath(self, ns_name_tuple):
ns_uri, local_name = ns_name_tuple
if ns_uri:
prefix = self._getPrefix(ns_uri)
return '%s:%s' % (prefix, local_name)
return local_name

## method of the ContentHandler interface #################################
def startElementNS(self, name, qname, attributes):
if attributes:
attributes = dict(
[(self._buildTag(k), v) for k, v in attributes.items()])
self.startElement(self._buildTag(name), attributes)

def startElement(self, name, attrs):
self.startElementNS((None, name), None, attrs)

def startElementNS(self, name, qname, attrs):
tagName = self._buildTag(name)
prefix = self._getPrefix(name[0])

# process xpath
self._xpath = "%s%s%s" % (self._xpath, '/', name)
_inc_xpath(self._h, self._xpath)
# nodes construction for element
node = [NT_NODE, name, name, [], None, self._n_elmt + 1,
self._h[self._xpath]]
node = [NT_NODE, tagName, tagName, [], None, self._n_elmt + 1,
self._h[self._xpath], prefix]
self._n_elmt += 1
self._xpath = "%s%s%s%s" % (
self._xpath, '[', self._h[self._xpath], ']')
# nodes construction for element's attributes
# sort attributes to avoid further moves
for key in sorted(attrs.keys()):
for key, value in sorted(attrs.items()):
self._n_elmt += 2
attr_node = [NT_ATTN, '@%sName' % key, key, [], None, 1, 0]
attrName = self._buildTag(key)
prefix = self._getPrefix(key[0])
attr_node = [NT_ATTN, '@%sName' % attrName, attrName, [], None,
1, 0, prefix]
link_node(node, attr_node)
link_node(attr_node, [NT_ATTV, '@%s' % key,
attrs.get(key, ''),
[], None, 0, 0])
link_node(attr_node, [NT_ATTV, '@%s' % attrName, value,
[], None, 0, 0, prefix])

link_node(self._p_stack[-1], node)
# set current element on the top of the father stack
Expand Down Expand Up @@ -138,7 +157,8 @@ def characters(self, ch):
xpath = '%s/text()' % self._xpath
_inc_xpath(self._h, xpath)
# nodes construction for text
node = [NT_TEXT, 'text()', ch, [], None, 0, self._h[xpath]]
node = [NT_TEXT, 'text()', ch, [], None, 0,
self._h[xpath], None]
link_node(parent, node)

## method of the LexicalHandler interface #################################
Expand All @@ -153,7 +173,7 @@ def comment(self, content):
_inc_xpath(self._h, xpath)
# nodes construction for comment
node = [NT_COMM, 'comment()', content, [], None,
0, self._h[xpath]]
0, self._h[xpath], None]
link_node(self._p_stack[-1], node)

# methods from xml.sax.saxlib.LexicalHandler (avoid dependency on pyxml)
Expand Down
111 changes: 64 additions & 47 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from xmldiff.input import tree_from_stream
from xmldiff.input import tree_from_lxml

from xmldiff.objects import N_VALUE, N_CHILDS, N_PARENT
from xmldiff.objects import N_VALUE, N_CHILDS, N_PARENT, N_NSPREFIX


HERE = os.path.dirname(__file__)
Expand Down Expand Up @@ -63,8 +63,8 @@ def test_tree_from_stream_simple():
[[1,
u'a',
u'a',
[[1, u'b', u'b', [], mock.ANY, 0, 1],
[1, u'c', u'c', [], mock.ANY, 0, 1],
[[1, u'b', u'b', [], mock.ANY, 0, 1, None],
[1, u'c', u'c', [], mock.ANY, 0, 1, None],
[1,
u'd',
u'd',
Expand All @@ -77,26 +77,32 @@ def test_tree_from_stream_simple():
[],
mock.ANY,
0,
1]],
1,
None]],
mock.ANY,
1,
1],
1,
None],
[1,
u'f',
u'f',
[],
mock.ANY,
0,
1]],
1,
None]],
mock.ANY,
3,
1]],
1,
None]],
mock.ANY,
6,
1]],
1,
None]],
None,
7,
0]
0,
None]
assert tree == expected


Expand Down Expand Up @@ -150,74 +156,85 @@ def test_tree_from_stream_with_namespace():
'/',
'',
[[1,
u'sec:section',
u'sec:section',
u'{urn:corp:sec}section',
u'{urn:corp:sec}section',
[[1,
u'sec:sectionInfo',
u'sec:sectionInfo',
u'{urn:corp:sec}sectionInfo',
u'{urn:corp:sec}sectionInfo',
[[1,
u'sec:secID',
u'sec:secID',
[[4, 'text()', u'S001', [], None, 0, 1]],
u'{urn:corp:sec}secID',
u'{urn:corp:sec}secID',
[[4, 'text()', u'S001', [], None, 0, 1, None]],
None,
1,
1],
1,
'sec'],
[1,
u'sec:name',
u'sec:name',
[[4, 'text()', u'Sales', [], None, 0, 1]],
u'{urn:corp:sec}name',
u'{urn:corp:sec}name',
[[4, 'text()', u'Sales', [], None, 0, 1, None]],
None,
1,
1]],
1,
'sec']],
None,
4,
1],
1,
'sec'],
[1,
u'sec:sectionInfo',
u'sec:sectionInfo',
u'{urn:corp:sec}sectionInfo',
u'{urn:corp:sec}sectionInfo',
[[2,
u'@nameName',
u'name',
[[3, u'@name', u'Development', [], None, 0, 0]],
[[3, u'@name', u'Development', [], None, 0, 0, None]],
None,
1,
0],
0,
None],
[2,
u'@secIDName',
u'secID',
[[3, u'@secID', u'S002', [], None, 0, 0]],
[[3, u'@secID', u'S002', [], None, 0, 0, None]],
None,
1,
0]],
0,
None]],
None,
4,
2],
2,
'sec'],
[1,
u'sec:sectionInfo',
u'sec:sectionInfo',
u'{urn:corp:sec}sectionInfo',
u'{urn:corp:sec}sectionInfo',
[[2,
u'@sec:nameName',
u'sec:name',
[[3, u'@sec:name', u'Gardening', [], None, 0, 0]],
u'@{urn:corp:sec}nameName',
u'{urn:corp:sec}name',
[[3, u'@{urn:corp:sec}name', u'Gardening', [], None, 0, 0, 'sec']],
None,
1,
0],
0,
'sec'],
[2,
u'@sec:secIDName',
u'sec:secID',
[[3, u'@sec:secID', u'S003', [], None, 0, 0]],
u'@{urn:corp:sec}secIDName',
u'{urn:corp:sec}secID',
[[3, u'@{urn:corp:sec}secID', u'S003', [], None, 0, 0, 'sec']],
None,
1,
0]],
0,
'sec']],
None,
4,
3]],
3,
'sec']],
None,
15,
1]],
1,
'sec']],
None,
16,
0]
0,
None]

assert tree == expected

Expand Down Expand Up @@ -246,8 +263,8 @@ def test_tree_from_lxml():
# This is only to fix this test, using xmldiff with these versions of
# lxml will still work, but the prefixes will be wrong.
def fix_lxml_421_tree(t, prefix):
t[1] = t[1].replace('ns00:', prefix)
t[2] = t[2].replace('ns00:', prefix)
if t[N_NSPREFIX] == 'ns00':
t[N_NSPREFIX] = prefix
for subtree in t[3]:
fix_lxml_421_tree(subtree, prefix)

Expand All @@ -264,7 +281,7 @@ def test_tree_from_lxml_with_namespace():
_nuke_parent(tree_stream)

# lxml <= 4.2.1
fix_lxml_421_tree(tree, 'sec:')
fix_lxml_421_tree(tree, 'sec')

assert tree == tree_stream

Expand All @@ -279,7 +296,7 @@ def test_tree_from_lxml_with_namespace():
_nuke_parent(tree_stream)

# lxml <= 4.2.1
fix_lxml_421_tree(tree, 'z:')
fix_lxml_421_tree(tree, 'z')

assert tree == tree_stream

Expand All @@ -295,7 +312,7 @@ def test_tree_from_lxml_with_default_namespace():
_nuke_parent(tree)
_nuke_parent(tree_stream)

fix_lxml_421_tree(tree, '')
fix_lxml_421_tree(tree, None)

assert tree == tree_stream

Expand Down
Loading

0 comments on commit 312740e

Please sign in to comment.