From 3b8297494eee3db2a628fef544fb2db0255fc58e Mon Sep 17 00:00:00 2001
From: Lennart Regebro <regebro@gmail.com>
Date: Mon, 25 Jun 2018 18:11:59 +0200
Subject: [PATCH] Once again fix the namespace handling

The original fix fixed too much. This fix does the right thing, but changes
the return format of the parser by adding a field for the namespace prefix.
---
 CHANGES.rst            |   6 ++-
 src/xmldiff/fmes.py    |   2 +-
 src/xmldiff/objects.py |  23 +++++++--
 src/xmldiff/parser.py  |  64 ++++++++++++++++--------
 tests/test_parser.py   | 111 ++++++++++++++++++++++++-----------------
 tests/test_regrtest.py |  11 ++--
 6 files changed, 135 insertions(+), 82 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 6c27a8e..266a2f4 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,7 +4,11 @@ CHANGES
 1.1.2 (unreleased)
 ------------------
 
-- Nothing changed yet.
+- When I fixed the xpath namespace handling I also changed the tag names to
+  an xpath syntax. This was unhelpful, so I changed that back. To solve this
+  I have had to extend the return format from the parser and ass a N_NSPREFIX
+  that contains the prefix. This is used by the differ to return correct
+  xpaths without changing the tags.
 
 
 1.1.1 (2018-06-20)
diff --git a/src/xmldiff/fmes.py b/src/xmldiff/fmes.py
index f7965e9..ec21cd7 100644
--- a/src/xmldiff/fmes.py
+++ b/src/xmldiff/fmes.py
@@ -384,7 +384,7 @@ def _before_attribute(self, parent_node, attr_node, new_name=None):
         return attr_name
 
     FAKE_TAG = [NT_NODE, 'LogilabXMLDIFFFAKETag', 'LogilabXMLDIFFFAKETag',
-                [], None, 0, 0, True, False]
+                [], None, 0, 0, None, True, False]
 
     def _before_insert_text(self, parent, new_text, k):
         """ check if a text node that will be remove has two sibbling text
diff --git a/src/xmldiff/objects.py b/src/xmldiff/objects.py
index 08926dd..35384e2 100644
--- a/src/xmldiff/objects.py
+++ b/src/xmldiff/objects.py
@@ -41,7 +41,8 @@
 N_PARENT = 4  # node's parent
 N_ISSUE = 5  # node's total issue number
 N_XNUM = 6  # to compute node's xpath
-NSIZE = 7  # number of items in a list which represent a node
+N_NSPREFIX = 7  # node's namespace prefix (if any)
+NSIZE = 8  # number of items in a list which represent a node
 
 # NODE TYPES
 # NT_SYST = 0 # SYSTEM node (added by parser) /!\ deprecated
@@ -120,14 +121,26 @@ def caract(node):
 
 def f_xpath(node, x=''):
     """ compute node's xpath """
-    if node[N_NAME] != '/':
+    name = node[N_NAME]
+    if '{' in name:
+        # We have a namespace
+        pre, rest = name.split('{', 1)
+        uri, local_name = rest.split('}', 1)
+        prefix = node[N_NSPREFIX]
+        if prefix is None:
+            # Default namespace
+            name = pre + local_name
+        else:
+            name = '%s%s:%s' % (pre, prefix, local_name)
+
+    if name != '/':
         if node[N_TYPE] == NT_ATTN:
             return f_xpath(node[N_PARENT],
-                           '/%s' % node[N_NAME][:len(node[N_NAME]) - 4])
+                           '/%s' % name[:len(name) - 4])
         if node[N_TYPE] == NT_ATTV:
-            return f_xpath(node[N_PARENT])  # [N_PARENT], '/%s'%node[N_NAME])
+            return f_xpath(node[N_PARENT])  # [N_PARENT], '/%s'%name)
         return f_xpath(node[N_PARENT], '/%s[%d]%s' % (
-            node[N_NAME], node[N_XNUM], x))
+            name, node[N_XNUM], x))
     elif not x:
         return '/'
     return x
diff --git a/src/xmldiff/parser.py b/src/xmldiff/parser.py
index 936e631..835fc90 100644
--- a/src/xmldiff/parser.py
+++ b/src/xmldiff/parser.py
@@ -43,7 +43,7 @@ class SaxHandler(ContentHandler):
     """
 
     def __init__(self, normalize_space, include_comment):
-        self._p_stack = [[NT_ROOT, '/', '', [], None, 0, 0]]
+        self._p_stack = [[NT_ROOT, '/', '', [], None, 0, 0, None]]
         self._norm_sp = normalize_space or None
         self._incl_comm = include_comment or None
         self._xpath = ''
@@ -70,40 +70,59 @@ def endPrefixMapping(self, prefix):
 
     def _buildTag(self, ns_name_tuple):
         ns_uri, local_name = ns_name_tuple
-
-        if ns_uri and ns_uri != self._default_ns:
-            ns_name = [x[0] for x in self._ns_mapping.items()
-                       if ns_uri in x[1]][0]
-            return "%s:%s" % (ns_name, local_name)
-
+        if ns_uri:
+            el_tag = "{%s}%s" % ns_name_tuple
+        else:
+            el_tag = local_name
+        return el_tag
+
+    def _getPrefix(self, ns_uri):
+        if not ns_uri:
+            return None
+        for (prefix, uri) in self._ns_mapping.items():
+            if ns_uri in uri:
+                return prefix
+        if ns_uri == 'http://www.w3.org/XML/1998/namespace':
+            # It's the xml: namespace, undeclared.
+            return 'xml'
+        raise ValueError("No prefix found for namespace URI %s" % ns_uri)
+
+    # Don't know if I need this
+    def _buildXPath(self, ns_name_tuple):
+        ns_uri, local_name = ns_name_tuple
+        if ns_uri:
+            prefix = self._getPrefix(ns_uri)
+            return '%s:%s' % (prefix, local_name)
         return local_name
 
     ## method of the ContentHandler interface #################################
-    def startElementNS(self, name, qname, attributes):
-        if attributes:
-            attributes = dict(
-                [(self._buildTag(k), v) for k, v in attributes.items()])
-        self.startElement(self._buildTag(name), attributes)
-
     def startElement(self, name, attrs):
+        self.startElementNS((None, name), None, attrs)
+
+    def startElementNS(self, name, qname, attrs):
+        tagName = self._buildTag(name)
+        prefix = self._getPrefix(name[0])
+
         # process xpath
         self._xpath = "%s%s%s" % (self._xpath, '/', name)
         _inc_xpath(self._h, self._xpath)
         # nodes construction for element
-        node = [NT_NODE, name, name, [], None, self._n_elmt + 1,
-                self._h[self._xpath]]
+        node = [NT_NODE, tagName, tagName, [], None, self._n_elmt + 1,
+                self._h[self._xpath], prefix]
         self._n_elmt += 1
         self._xpath = "%s%s%s%s" % (
             self._xpath, '[', self._h[self._xpath], ']')
         # nodes construction for element's attributes
         # sort attributes to avoid further moves
-        for key in sorted(attrs.keys()):
+        for key, value in sorted(attrs.items()):
             self._n_elmt += 2
-            attr_node = [NT_ATTN, '@%sName' % key, key, [], None, 1, 0]
+            attrName = self._buildTag(key)
+            prefix = self._getPrefix(key[0])
+            attr_node = [NT_ATTN, '@%sName' % attrName, attrName, [], None,
+                         1, 0, prefix]
             link_node(node, attr_node)
-            link_node(attr_node, [NT_ATTV, '@%s' % key,
-                                  attrs.get(key, ''),
-                                  [], None, 0, 0])
+            link_node(attr_node, [NT_ATTV, '@%s' % attrName, value,
+                                  [], None, 0, 0, prefix])
 
         link_node(self._p_stack[-1], node)
         # set current element on the top of the father stack
@@ -138,7 +157,8 @@ def characters(self, ch):
                 xpath = '%s/text()' % self._xpath
                 _inc_xpath(self._h, xpath)
                 # nodes construction for text
-                node = [NT_TEXT, 'text()', ch, [], None, 0, self._h[xpath]]
+                node = [NT_TEXT, 'text()', ch, [], None, 0,
+                        self._h[xpath], None]
                 link_node(parent, node)
 
     ## method of the LexicalHandler interface #################################
@@ -153,7 +173,7 @@ def comment(self, content):
             _inc_xpath(self._h, xpath)
             # nodes construction for comment
             node = [NT_COMM, 'comment()', content, [], None,
-                    0, self._h[xpath]]
+                    0, self._h[xpath], None]
             link_node(self._p_stack[-1], node)
 
     # methods from xml.sax.saxlib.LexicalHandler (avoid dependency on pyxml)
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 35cea64..2a28fb7 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -26,7 +26,7 @@
 from xmldiff.input import tree_from_stream
 from xmldiff.input import tree_from_lxml
 
-from xmldiff.objects import N_VALUE, N_CHILDS, N_PARENT
+from xmldiff.objects import N_VALUE, N_CHILDS, N_PARENT, N_NSPREFIX
 
 
 HERE = os.path.dirname(__file__)
@@ -63,8 +63,8 @@ def test_tree_from_stream_simple():
         [[1,
           u'a',
           u'a',
-          [[1, u'b', u'b', [], mock.ANY, 0, 1],
-           [1, u'c', u'c', [], mock.ANY, 0, 1],
+          [[1, u'b', u'b', [], mock.ANY, 0, 1, None],
+           [1, u'c', u'c', [], mock.ANY, 0, 1, None],
            [1,
             u'd',
             u'd',
@@ -77,26 +77,32 @@ def test_tree_from_stream_simple():
                 [],
                 mock.ANY,
                 0,
-                1]],
+                1,
+                None]],
               mock.ANY,
               1,
-              1],
+              1,
+              None],
              [1,
               u'f',
               u'f',
               [],
               mock.ANY,
               0,
-              1]],
+              1,
+              None]],
             mock.ANY,
             3,
-            1]],
+            1,
+            None]],
           mock.ANY,
           6,
-          1]],
+          1,
+          None]],
         None,
         7,
-        0]
+        0,
+        None]
     assert tree == expected
 
 
@@ -150,74 +156,85 @@ def test_tree_from_stream_with_namespace():
         '/',
         '',
         [[1,
-          u'sec:section',
-          u'sec:section',
+          u'{urn:corp:sec}section',
+          u'{urn:corp:sec}section',
           [[1,
-            u'sec:sectionInfo',
-            u'sec:sectionInfo',
+            u'{urn:corp:sec}sectionInfo',
+            u'{urn:corp:sec}sectionInfo',
             [[1,
-              u'sec:secID',
-              u'sec:secID',
-              [[4, 'text()', u'S001', [], None, 0, 1]],
+              u'{urn:corp:sec}secID',
+              u'{urn:corp:sec}secID',
+              [[4, 'text()', u'S001', [], None, 0, 1, None]],
               None,
               1,
-              1],
+              1,
+              'sec'],
              [1,
-              u'sec:name',
-              u'sec:name',
-              [[4, 'text()', u'Sales', [], None, 0, 1]],
+              u'{urn:corp:sec}name',
+              u'{urn:corp:sec}name',
+              [[4, 'text()', u'Sales', [], None, 0, 1, None]],
               None,
               1,
-              1]],
+              1,
+              'sec']],
             None,
             4,
-            1],
+            1,
+            'sec'],
            [1,
-            u'sec:sectionInfo',
-            u'sec:sectionInfo',
+            u'{urn:corp:sec}sectionInfo',
+            u'{urn:corp:sec}sectionInfo',
             [[2,
               u'@nameName',
               u'name',
-              [[3, u'@name', u'Development', [], None, 0, 0]],
+              [[3, u'@name', u'Development', [], None, 0, 0, None]],
               None,
               1,
-              0],
+              0,
+              None],
              [2,
               u'@secIDName',
               u'secID',
-              [[3, u'@secID', u'S002', [], None, 0, 0]],
+              [[3, u'@secID', u'S002', [], None, 0, 0, None]],
               None,
               1,
-              0]],
+              0,
+              None]],
             None,
             4,
-            2],
+            2,
+            'sec'],
            [1,
-            u'sec:sectionInfo',
-            u'sec:sectionInfo',
+            u'{urn:corp:sec}sectionInfo',
+            u'{urn:corp:sec}sectionInfo',
             [[2,
-              u'@sec:nameName',
-              u'sec:name',
-              [[3, u'@sec:name', u'Gardening', [], None, 0, 0]],
+              u'@{urn:corp:sec}nameName',
+              u'{urn:corp:sec}name',
+              [[3, u'@{urn:corp:sec}name', u'Gardening', [], None, 0, 0, 'sec']],
               None,
               1,
-              0],
+              0,
+              'sec'],
              [2,
-              u'@sec:secIDName',
-              u'sec:secID',
-              [[3, u'@sec:secID', u'S003', [], None, 0, 0]],
+              u'@{urn:corp:sec}secIDName',
+              u'{urn:corp:sec}secID',
+              [[3, u'@{urn:corp:sec}secID', u'S003', [], None, 0, 0, 'sec']],
               None,
               1,
-              0]],
+              0,
+              'sec']],
             None,
             4,
-            3]],
+            3,
+            'sec']],
           None,
           15,
-          1]],
+          1,
+          'sec']],
         None,
         16,
-        0]
+        0,
+        None]
 
     assert tree == expected
 
@@ -246,8 +263,8 @@ def test_tree_from_lxml():
 # This is only to fix this test, using xmldiff with these versions of
 # lxml will still work, but the prefixes will be wrong.
 def fix_lxml_421_tree(t, prefix):
-    t[1] = t[1].replace('ns00:', prefix)
-    t[2] = t[2].replace('ns00:', prefix)
+    if t[N_NSPREFIX] == 'ns00':
+        t[N_NSPREFIX] = prefix
     for subtree in t[3]:
         fix_lxml_421_tree(subtree, prefix)
 
@@ -264,7 +281,7 @@ def test_tree_from_lxml_with_namespace():
     _nuke_parent(tree_stream)
 
     # lxml <= 4.2.1
-    fix_lxml_421_tree(tree, 'sec:')
+    fix_lxml_421_tree(tree, 'sec')
 
     assert tree == tree_stream
 
@@ -279,7 +296,7 @@ def test_tree_from_lxml_with_namespace():
     _nuke_parent(tree_stream)
 
     # lxml <= 4.2.1
-    fix_lxml_421_tree(tree, 'z:')
+    fix_lxml_421_tree(tree, 'z')
 
     assert tree == tree_stream
 
@@ -295,7 +312,7 @@ def test_tree_from_lxml_with_default_namespace():
     _nuke_parent(tree)
     _nuke_parent(tree_stream)
 
-    fix_lxml_421_tree(tree, '')
+    fix_lxml_421_tree(tree, None)
 
     assert tree == tree_stream
 
diff --git a/tests/test_regrtest.py b/tests/test_regrtest.py
index ec6f4e4..19fa74b 100644
--- a/tests/test_regrtest.py
+++ b/tests/test_regrtest.py
@@ -34,15 +34,15 @@ def get_output(options):
     backup = sys.stdout
 
     # capture stdout
-    sys.stdout = six.StringIO()
+    sys.stdout = out = six.StringIO()
     try:
         main.run(options)
     except SystemExit:
         pass
     finally:
-        output = sys.stdout.getvalue().strip()
-        sys.stdout.close()
         sys.stdout = backup
+        output = out.getvalue().strip()
+        out.close()
 
     return output
 
@@ -157,9 +157,8 @@ def test_known(fnames, lcs2_type):
     old = fnames['old']
     new = fnames['new']
     res_file = fnames['result']
-    f = open(res_file)
-    expected = f.read().strip()
-    f.close()
+    with open(res_file) as f:
+        expected = f.read().strip()
     options = [old, new]
     data = get_output(options)
     assert data == expected, '%s:\n%r != %r' % (options, data, expected)