Skip to content

Commit

Permalink
Once again fix the namespace handling
Browse files Browse the repository at this point in the history
The original fix fixed too much. This fix does the right thing, but changes
the return format of the parser by adding a field for the namespace prefix.
  • Loading branch information
regebro committed Jun 26, 2018
1 parent c33271d commit 3b82974
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 82 deletions.
6 changes: 5 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ CHANGES
1.1.2 (unreleased)
------------------

- Nothing changed yet.
- When I fixed the xpath namespace handling I also changed the tag names to
an xpath syntax. This was unhelpful, so I changed that back. To solve this
I have had to extend the return format from the parser and ass a N_NSPREFIX
that contains the prefix. This is used by the differ to return correct
xpaths without changing the tags.


1.1.1 (2018-06-20)
Expand Down
2 changes: 1 addition & 1 deletion src/xmldiff/fmes.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def _before_attribute(self, parent_node, attr_node, new_name=None):
return attr_name

FAKE_TAG = [NT_NODE, 'LogilabXMLDIFFFAKETag', 'LogilabXMLDIFFFAKETag',
[], None, 0, 0, True, False]
[], None, 0, 0, None, True, False]

def _before_insert_text(self, parent, new_text, k):
""" check if a text node that will be remove has two sibbling text
Expand Down
23 changes: 18 additions & 5 deletions src/xmldiff/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@
N_PARENT = 4 # node's parent
N_ISSUE = 5 # node's total issue number
N_XNUM = 6 # to compute node's xpath
NSIZE = 7 # number of items in a list which represent a node
N_NSPREFIX = 7 # node's namespace prefix (if any)
NSIZE = 8 # number of items in a list which represent a node

# NODE TYPES
# NT_SYST = 0 # SYSTEM node (added by parser) /!\ deprecated
Expand Down Expand Up @@ -120,14 +121,26 @@ def caract(node):

def f_xpath(node, x=''):
""" compute node's xpath """
if node[N_NAME] != '/':
name = node[N_NAME]
if '{' in name:
# We have a namespace
pre, rest = name.split('{', 1)
uri, local_name = rest.split('}', 1)
prefix = node[N_NSPREFIX]
if prefix is None:
# Default namespace
name = pre + local_name
else:
name = '%s%s:%s' % (pre, prefix, local_name)

if name != '/':
if node[N_TYPE] == NT_ATTN:
return f_xpath(node[N_PARENT],
'/%s' % node[N_NAME][:len(node[N_NAME]) - 4])
'/%s' % name[:len(name) - 4])
if node[N_TYPE] == NT_ATTV:
return f_xpath(node[N_PARENT]) # [N_PARENT], '/%s'%node[N_NAME])
return f_xpath(node[N_PARENT]) # [N_PARENT], '/%s'%name)
return f_xpath(node[N_PARENT], '/%s[%d]%s' % (
node[N_NAME], node[N_XNUM], x))
name, node[N_XNUM], x))
elif not x:
return '/'
return x
Expand Down
64 changes: 42 additions & 22 deletions src/xmldiff/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class SaxHandler(ContentHandler):
"""

def __init__(self, normalize_space, include_comment):
self._p_stack = [[NT_ROOT, '/', '', [], None, 0, 0]]
self._p_stack = [[NT_ROOT, '/', '', [], None, 0, 0, None]]
self._norm_sp = normalize_space or None
self._incl_comm = include_comment or None
self._xpath = ''
Expand All @@ -70,40 +70,59 @@ def endPrefixMapping(self, prefix):

def _buildTag(self, ns_name_tuple):
ns_uri, local_name = ns_name_tuple

if ns_uri and ns_uri != self._default_ns:
ns_name = [x[0] for x in self._ns_mapping.items()
if ns_uri in x[1]][0]
return "%s:%s" % (ns_name, local_name)

if ns_uri:
el_tag = "{%s}%s" % ns_name_tuple
else:
el_tag = local_name
return el_tag

def _getPrefix(self, ns_uri):
if not ns_uri:
return None
for (prefix, uri) in self._ns_mapping.items():
if ns_uri in uri:
return prefix
if ns_uri == 'http://www.w3.org/XML/1998/namespace':
# It's the xml: namespace, undeclared.
return 'xml'
raise ValueError("No prefix found for namespace URI %s" % ns_uri)

# Don't know if I need this
def _buildXPath(self, ns_name_tuple):
ns_uri, local_name = ns_name_tuple
if ns_uri:
prefix = self._getPrefix(ns_uri)
return '%s:%s' % (prefix, local_name)
return local_name

## method of the ContentHandler interface #################################
def startElementNS(self, name, qname, attributes):
if attributes:
attributes = dict(
[(self._buildTag(k), v) for k, v in attributes.items()])
self.startElement(self._buildTag(name), attributes)

def startElement(self, name, attrs):
self.startElementNS((None, name), None, attrs)

def startElementNS(self, name, qname, attrs):
tagName = self._buildTag(name)
prefix = self._getPrefix(name[0])

# process xpath
self._xpath = "%s%s%s" % (self._xpath, '/', name)
_inc_xpath(self._h, self._xpath)
# nodes construction for element
node = [NT_NODE, name, name, [], None, self._n_elmt + 1,
self._h[self._xpath]]
node = [NT_NODE, tagName, tagName, [], None, self._n_elmt + 1,
self._h[self._xpath], prefix]
self._n_elmt += 1
self._xpath = "%s%s%s%s" % (
self._xpath, '[', self._h[self._xpath], ']')
# nodes construction for element's attributes
# sort attributes to avoid further moves
for key in sorted(attrs.keys()):
for key, value in sorted(attrs.items()):
self._n_elmt += 2
attr_node = [NT_ATTN, '@%sName' % key, key, [], None, 1, 0]
attrName = self._buildTag(key)
prefix = self._getPrefix(key[0])
attr_node = [NT_ATTN, '@%sName' % attrName, attrName, [], None,
1, 0, prefix]
link_node(node, attr_node)
link_node(attr_node, [NT_ATTV, '@%s' % key,
attrs.get(key, ''),
[], None, 0, 0])
link_node(attr_node, [NT_ATTV, '@%s' % attrName, value,
[], None, 0, 0, prefix])

link_node(self._p_stack[-1], node)
# set current element on the top of the father stack
Expand Down Expand Up @@ -138,7 +157,8 @@ def characters(self, ch):
xpath = '%s/text()' % self._xpath
_inc_xpath(self._h, xpath)
# nodes construction for text
node = [NT_TEXT, 'text()', ch, [], None, 0, self._h[xpath]]
node = [NT_TEXT, 'text()', ch, [], None, 0,
self._h[xpath], None]
link_node(parent, node)

## method of the LexicalHandler interface #################################
Expand All @@ -153,7 +173,7 @@ def comment(self, content):
_inc_xpath(self._h, xpath)
# nodes construction for comment
node = [NT_COMM, 'comment()', content, [], None,
0, self._h[xpath]]
0, self._h[xpath], None]
link_node(self._p_stack[-1], node)

# methods from xml.sax.saxlib.LexicalHandler (avoid dependency on pyxml)
Expand Down
111 changes: 64 additions & 47 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from xmldiff.input import tree_from_stream
from xmldiff.input import tree_from_lxml

from xmldiff.objects import N_VALUE, N_CHILDS, N_PARENT
from xmldiff.objects import N_VALUE, N_CHILDS, N_PARENT, N_NSPREFIX


HERE = os.path.dirname(__file__)
Expand Down Expand Up @@ -63,8 +63,8 @@ def test_tree_from_stream_simple():
[[1,
u'a',
u'a',
[[1, u'b', u'b', [], mock.ANY, 0, 1],
[1, u'c', u'c', [], mock.ANY, 0, 1],
[[1, u'b', u'b', [], mock.ANY, 0, 1, None],
[1, u'c', u'c', [], mock.ANY, 0, 1, None],
[1,
u'd',
u'd',
Expand All @@ -77,26 +77,32 @@ def test_tree_from_stream_simple():
[],
mock.ANY,
0,
1]],
1,
None]],
mock.ANY,
1,
1],
1,
None],
[1,
u'f',
u'f',
[],
mock.ANY,
0,
1]],
1,
None]],
mock.ANY,
3,
1]],
1,
None]],
mock.ANY,
6,
1]],
1,
None]],
None,
7,
0]
0,
None]
assert tree == expected


Expand Down Expand Up @@ -150,74 +156,85 @@ def test_tree_from_stream_with_namespace():
'/',
'',
[[1,
u'sec:section',
u'sec:section',
u'{urn:corp:sec}section',
u'{urn:corp:sec}section',
[[1,
u'sec:sectionInfo',
u'sec:sectionInfo',
u'{urn:corp:sec}sectionInfo',
u'{urn:corp:sec}sectionInfo',
[[1,
u'sec:secID',
u'sec:secID',
[[4, 'text()', u'S001', [], None, 0, 1]],
u'{urn:corp:sec}secID',
u'{urn:corp:sec}secID',
[[4, 'text()', u'S001', [], None, 0, 1, None]],
None,
1,
1],
1,
'sec'],
[1,
u'sec:name',
u'sec:name',
[[4, 'text()', u'Sales', [], None, 0, 1]],
u'{urn:corp:sec}name',
u'{urn:corp:sec}name',
[[4, 'text()', u'Sales', [], None, 0, 1, None]],
None,
1,
1]],
1,
'sec']],
None,
4,
1],
1,
'sec'],
[1,
u'sec:sectionInfo',
u'sec:sectionInfo',
u'{urn:corp:sec}sectionInfo',
u'{urn:corp:sec}sectionInfo',
[[2,
u'@nameName',
u'name',
[[3, u'@name', u'Development', [], None, 0, 0]],
[[3, u'@name', u'Development', [], None, 0, 0, None]],
None,
1,
0],
0,
None],
[2,
u'@secIDName',
u'secID',
[[3, u'@secID', u'S002', [], None, 0, 0]],
[[3, u'@secID', u'S002', [], None, 0, 0, None]],
None,
1,
0]],
0,
None]],
None,
4,
2],
2,
'sec'],
[1,
u'sec:sectionInfo',
u'sec:sectionInfo',
u'{urn:corp:sec}sectionInfo',
u'{urn:corp:sec}sectionInfo',
[[2,
u'@sec:nameName',
u'sec:name',
[[3, u'@sec:name', u'Gardening', [], None, 0, 0]],
u'@{urn:corp:sec}nameName',
u'{urn:corp:sec}name',
[[3, u'@{urn:corp:sec}name', u'Gardening', [], None, 0, 0, 'sec']],
None,
1,
0],
0,
'sec'],
[2,
u'@sec:secIDName',
u'sec:secID',
[[3, u'@sec:secID', u'S003', [], None, 0, 0]],
u'@{urn:corp:sec}secIDName',
u'{urn:corp:sec}secID',
[[3, u'@{urn:corp:sec}secID', u'S003', [], None, 0, 0, 'sec']],
None,
1,
0]],
0,
'sec']],
None,
4,
3]],
3,
'sec']],
None,
15,
1]],
1,
'sec']],
None,
16,
0]
0,
None]

assert tree == expected

Expand Down Expand Up @@ -246,8 +263,8 @@ def test_tree_from_lxml():
# This is only to fix this test, using xmldiff with these versions of
# lxml will still work, but the prefixes will be wrong.
def fix_lxml_421_tree(t, prefix):
t[1] = t[1].replace('ns00:', prefix)
t[2] = t[2].replace('ns00:', prefix)
if t[N_NSPREFIX] == 'ns00':
t[N_NSPREFIX] = prefix
for subtree in t[3]:
fix_lxml_421_tree(subtree, prefix)

Expand All @@ -264,7 +281,7 @@ def test_tree_from_lxml_with_namespace():
_nuke_parent(tree_stream)

# lxml <= 4.2.1
fix_lxml_421_tree(tree, 'sec:')
fix_lxml_421_tree(tree, 'sec')

assert tree == tree_stream

Expand All @@ -279,7 +296,7 @@ def test_tree_from_lxml_with_namespace():
_nuke_parent(tree_stream)

# lxml <= 4.2.1
fix_lxml_421_tree(tree, 'z:')
fix_lxml_421_tree(tree, 'z')

assert tree == tree_stream

Expand All @@ -295,7 +312,7 @@ def test_tree_from_lxml_with_default_namespace():
_nuke_parent(tree)
_nuke_parent(tree_stream)

fix_lxml_421_tree(tree, '')
fix_lxml_421_tree(tree, None)

assert tree == tree_stream

Expand Down
Loading

0 comments on commit 3b82974

Please sign in to comment.