[svn r3046] use default prefixes for common namespaces (following ET …

…1.3) --HG-- branch : trunk
SimonSapin · Nov 5, 2007 · 39bb968 · 39bb968
1 parent 8d4ad07
commit 39bb968
Show file tree

Hide file tree

Showing 4 changed files with 27 additions and 4 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -8,6 +8,8 @@ Under development
 Features added
 --------------
 
+* Use default prefixes for some common XML namespaces
+
 * ``lxml.html.clean.Cleaner`` now allows for a ``host_whitelist``, and
   two overridable methods: ``allow_embedded_url(el, url)`` and the
   more general ``allow_element(el)``.

diff --git a/doc/objectify.txt b/doc/objectify.txt
@@ -956,12 +956,15 @@ namespace information if you choose to do so::
     >>> for prefix, namespace in el.nsmap.items():
     ...     print prefix, '-', namespace
     ns0 - http://codespeak.net/lxml/objectify/pytype
-    ns1 - http://www.w3.org/2001/XMLSchema-instance
     foo - http://www.w3.org/2001/XMLSchema
+    xsi - http://www.w3.org/2001/XMLSchema-instance
 
     >>> print el.get("{http://www.w3.org/2001/XMLSchema-instance}type")
     foo:string
 
+Note how lxml chose a default prefix for the XML Schema Instance
+namespace.  We can override it as in the following example::
+
     >>> el = objectify.DataElement('5', _xsi='foo:string',
     ...          nsmap={'foo': 'http://www.w3.org/2001/XMLSchema',
     ...                 'myxsi': 'http://www.w3.org/2001/XMLSchema-instance'})

diff --git a/doc/tutorial.txt b/doc/tutorial.txt
@@ -658,9 +658,9 @@ the real namespaces instead::
     >>> body.text = "Hello World"
 
     >>> print etree.tostring(xhtml, pretty_print=True)
-    <ns0:html xmlns:ns0="http://www.w3.org/1999/xhtml">
-      <ns0:body>Hello World</ns0:body>
-    </ns0:html>
+    <html:html xmlns:html="http://www.w3.org/1999/xhtml">
+      <html:body>Hello World</html:body>
+    </html:html>
 
 .. _`namespace prefixes`: http://www.w3.org/TR/xml-names/#ns-qualnames
 

diff --git a/src/lxml/lxml.etree.pyx b/src/lxml/lxml.etree.pyx
@@ -68,6 +68,17 @@ if _FILENAME_ENCODING is None:
 cdef char* _C_FILENAME_ENCODING
 _C_FILENAME_ENCODING = _cstr(_FILENAME_ENCODING)
 
+# set up some default namespace prefixes
+_DEFAULT_NAMESPACE_PREFIXES = {
+    "http://www.w3.org/1999/xhtml": "html",
+    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
+    "http://schemas.xmlsoap.org/wsdl/": "wsdl",
+    # xml schema
+    "http://www.w3.org/2001/XMLSchema": "xs",
+    "http://www.w3.org/2001/XMLSchema-instance": "xsi",
+    # dublic core
+    "http://purl.org/dc/elements/1.1/": "dc",
+}
 
 # Error superclass for ElementTree compatibility
 class Error(Exception):
@@ -323,6 +334,7 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
         """
         cdef xmlNs* c_ns
         cdef xmlNs* c_doc_ns
+        cdef python.PyObject* dict_result
         if c_node.type != tree.XML_ELEMENT_NODE:
             assert c_node.type == tree.XML_ELEMENT_NODE, \
                 "invalid node type %d, expected %d" % (
@@ -332,6 +344,12 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
         if c_ns is not NULL:
             return c_ns
 
+        if c_prefix is NULL:
+            dict_result = python.PyDict_GetItemString(
+                _DEFAULT_NAMESPACE_PREFIXES, c_href)
+            if dict_result is not NULL:
+                c_prefix = _cstr(<object>dict_result)
+
         if c_prefix is NULL or \
                tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL:
             # try to simulate ElementTree's namespace prefix creation