Skip to content

Commit

Permalink
[svn r3146] r3205@delle: sbehnel | 2008-01-04 19:21:48 +0100
Browse files Browse the repository at this point in the history
 check entity/character references in Entity() factory

--HG--
branch : trunk
  • Loading branch information
scoder committed Jan 4, 2008
1 parent 53ce2c5 commit bf90e35
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 12 deletions.
3 changes: 3 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ Under development
Features added
--------------

* Invalid entity names and character references will now be rejected
by the ``Entity()`` factory.

* ``entity.text`` now returns the textual representation of the
entity, e.g. ``&``.

Expand Down
31 changes: 25 additions & 6 deletions src/lxml/apihelpers.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1043,22 +1043,41 @@ cdef int _htmlNameIsValid(char* c_name):
c_name = c_name + 1
return 1

cdef bint _characterReferenceIsValid(char* c_name):
cdef bint is_hex
if c_name[0] == c'x':
c_name += 1
is_hex = 1
else:
is_hex = 0
if c_name[0] == c'\0':
return 0
while c_name[0] != c'\0':
if c_name[0] < c'0' or c_name[0] > c'9':
if not is_hex:
return 0
if not (c_name[0] >= c'a' and c_name[0] <= c'f'):
if not (c_name[0] >= c'A' and c_name[0] <= c'F'):
return 0
c_name += 1
return 1

cdef int _tagValidOrRaise(tag_utf) except -1:
if not _pyXmlNameIsValid(tag_utf):
raise ValueError, "Invalid tag name %r" % \
python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict')
raise ValueError("Invalid tag name %r" % \
python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict'))
return 0

cdef int _htmlTagValidOrRaise(tag_utf) except -1:
if not _pyHtmlNameIsValid(tag_utf):
raise ValueError, "Invalid HTML tag name %r" % \
python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict')
raise ValueError("Invalid HTML tag name %r" % \
python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict'))
return 0

cdef int _attributeValidOrRaise(name_utf) except -1:
if not _pyXmlNameIsValid(name_utf):
raise ValueError, "Invalid attribute name %r" % \
python.PyUnicode_FromEncodedObject(name_utf, 'UTF-8', 'strict')
raise ValueError("Invalid attribute name %r" % \
python.PyUnicode_FromEncodedObject(name_utf, 'UTF-8', 'strict'))
return 0

cdef object _namespacedName(xmlNode* c_node):
Expand Down
20 changes: 14 additions & 6 deletions src/lxml/lxml.etree.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2110,18 +2110,26 @@ def ProcessingInstruction(target, text=None):
PI = ProcessingInstruction

def Entity(name):
"""Entity factory. This factory function creates a special element that
will be serialized as an XML entity. Note, however, that the entity will
not be automatically declared in the document. A document that uses
entities requires a DTD.
"""Entity factory. This factory function creates a special element
that will be serialized as an XML entity reference or character
reference. Note, however, that entities will not be automatically
declared in the document. A document that uses entity references
requires a DTD to define the entities.
"""
cdef _Document doc
cdef xmlNode* c_node
cdef xmlDoc* c_doc
name = _utf8(name)
cdef char* c_name
name_utf = _utf8(name)
c_name = _cstr(name_utf)
if c_name[0] == c'#':
if not _characterReferenceIsValid(c_name + 1):
raise ValueError("Invalid character reference: '%s'" % name)
elif not _xmlNameIsValid(c_name):
raise ValueError("Invalid entity reference: '%s'" % name)
c_doc = _newDoc()
doc = _documentFactory(c_doc, None)
c_node = _createEntity(c_doc, _cstr(name))
c_node = _createEntity(c_doc, c_name)
tree.xmlAddChild(<xmlNode*>c_doc, c_node)
return _elementFactory(doc, c_node)

Expand Down
15 changes: 15 additions & 0 deletions src/lxml/tests/test_etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,21 @@ def test_entity_append(self):
self.assertEquals('<root>&test;</root>',
tostring(root))

def test_entity_values(self):
Entity = self.etree.Entity
self.assertEquals(Entity("test").text, '&test;')
self.assertEquals(Entity("#17683").text, '&#17683;')
self.assertEquals(Entity("#x1768").text, '&#x1768;')
self.assertEquals(Entity("#x98AF").text, '&#x98AF;')

def test_entity_error(self):
Entity = self.etree.Entity
self.assertRaises(ValueError, Entity, 'a b c')
self.assertRaises(ValueError, Entity, 'a,b')
self.assertRaises(AssertionError, Entity, 'a\0b')
self.assertRaises(ValueError, Entity, '#abc')
self.assertRaises(ValueError, Entity, '#xxyz')

# TypeError in etree, AssertionError in ElementTree;
def test_setitem_assert(self):
Element = self.etree.Element
Expand Down

0 comments on commit bf90e35

Please sign in to comment.