Skip to content

Commit

Permalink
[svn r3819] r4487@delle: sbehnel | 2008-06-06 13:56:34 +0200
Browse files Browse the repository at this point in the history
 always set 'UTF-8' as document encoding if it wasn't provided - keeps the serialiser from escaping attribute values

--HG--
branch : trunk
  • Loading branch information
scoder committed Jun 6, 2008
1 parent 8dbad3a commit 5ceb85b
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 3 deletions.
3 changes: 3 additions & 0 deletions CHANGES.txt
Expand Up @@ -57,6 +57,9 @@ Bugs fixed
Other changes
-------------

* Non-ASCII characters in attribute values are no longer escaped on
serialisation.

* Passing non-ASCII byte strings or invalid unicode strings as .tag,
namespaces, etc. will result in a ValueError instead of an
AssertionError (just like the tag well-formedness check).
Expand Down
9 changes: 7 additions & 2 deletions src/lxml/parser.pxi
Expand Up @@ -588,8 +588,11 @@ cdef xmlDoc* _handleParseResult(_ParserContext context,
_raiseParseError(c_ctxt, filename, context._error_log)
else:
_raiseParseError(c_ctxt, filename, None)
elif result.URL is NULL and filename is not None:
result.URL = tree.xmlStrdup(_cstr(filename))
else:
if result.URL is NULL and filename is not None:
result.URL = tree.xmlStrdup(_cstr(filename))
if result.encoding is NULL:
result.encoding = tree.xmlStrdup("UTF-8")
return result

cdef int _fixHtmlDictNames(tree.xmlDict* c_dict, xmlDoc* c_doc) nogil:
Expand Down Expand Up @@ -1366,6 +1369,8 @@ cdef xmlDoc* _newXMLDoc() except NULL:
result = tree.xmlNewDoc(NULL)
if result is NULL:
python.PyErr_NoMemory()
if result.encoding is NULL:
result.encoding = tree.xmlStrdup("UTF-8")
__GLOBAL_PARSER_CONTEXT.initDocDict(result)
return result

Expand Down
2 changes: 1 addition & 1 deletion src/lxml/tests/test_etree.py
Expand Up @@ -1931,7 +1931,7 @@ def test_docinfo_empty(self):
xml = _bytes('<html><body></body></html>')
tree = etree.parse(BytesIO(xml))
docinfo = tree.docinfo
self.assertEquals(docinfo.encoding, None)
self.assertEquals(docinfo.encoding, "UTF-8")
self.assertEquals(docinfo.xml_version, "1.0")
self.assertEquals(docinfo.public_id, None)
self.assertEquals(docinfo.system_url, None)
Expand Down

0 comments on commit 5ceb85b

Please sign in to comment.