Skip to content

Commit

Permalink
expect more errors like bug 568317
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeff Balogh committed Aug 23, 2010
1 parent dcf900f commit d4bdcba
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 3 deletions.
20 changes: 18 additions & 2 deletions bleach/__init__.py
@@ -1,10 +1,13 @@
import logging
import re

import html5lib
from html5lib.serializer.htmlserializer import HTMLSerializer
from sanitizer import BleachSanitizer
from encoding import force_unicode

log = logging.getLogger('bleach')

ALLOWED_TAGS = [
'a',
'abbr',
Expand Down Expand Up @@ -72,7 +75,7 @@ class s(BleachSanitizer):

parser = html5lib.HTMLParser(tokenizer=s)

return force_unicode(_serialize(parser.parseFragment(string))).strip()
return render(parser.parseFragment(string), string).strip()

def linkify(self, text, nofollow=True):
"""Convert URL-like strings in an HTML fragment to links.
Expand Down Expand Up @@ -141,7 +144,7 @@ def link_repl(match):

linkify_nodes(forest)

return force_unicode(_serialize(forest))
return render(forest, text)

def filter_url(self, url):
"""Applied to the href attribute of an autolinked URL"""
Expand All @@ -152,6 +155,19 @@ def filter_text(self, url):
return url


def render(tree, source):
"""Try rendering as HTML, then XML, then give up."""
try:
return force_unicode(_serialize(tree))
except Exception, e:
log.error('HTML: %r ::: %r' % (e, source))
try:
return force_unicode(tree.to_xml())
except Exception, e:
log.error('XML: %r ::: %r' % (e, source))
return u''


def _serialize(domtree):
walker = html5lib.treewalkers.getTreeWalker('simpletree')
stream = walker(domtree)
Expand Down
9 changes: 8 additions & 1 deletion bleach/tests/test_basics.py
@@ -1,6 +1,8 @@
import html5lib

from nose.tools import eq_

from bleach import Bleach
from bleach import Bleach, render

b = Bleach()

Expand Down Expand Up @@ -84,3 +86,8 @@ def test_serializer():
def test_no_href_links():
s = u'<a name="anchor">x</a>'
eq_(s, b.linkify(s, nofollow=False))


def test_xml_render():
parser = html5lib.HTMLParser()
eq_(render(parser.parseFragment(''), 'src'), '')

0 comments on commit d4bdcba

Please sign in to comment.