diff --git a/planet/sift.rb b/planet/sift.rb index 1012d0a..81b54f2 100644 --- a/planet/sift.rb +++ b/planet/sift.rb @@ -112,15 +112,9 @@ def Planet.make_absolute node, attr_name include HTML5::HTMLSanitizeModule @sanitizer = HTML5::HTMLSanitizer.new '' def Planet.sanitize node, fido - # cull empty formatting elements. They can cause FF & Konq to nest badly. - # For instance, causes everything after it to italicized, including other entries. + # ensure that non-void elements don't use XML's empty element syntax if node.elements.size == 0 && node.text == nil - if %w{abbr acronym b big cite code del dfn em i ins kbd s - samp small strike strong sub sup tt u var}.include? node.name - # If the node has no children and no text, it can only cause trouble. - node.remove - return - end + node.text = '' unless HTML5::VOID_ELEMENTS.include? node.name end node.elements.each {|child| sanitize child, fido} diff --git a/test/sift.rb b/test/sift.rb new file mode 100644 index 0000000..4960dc6 --- /dev/null +++ b/test/sift.rb @@ -0,0 +1,14 @@ +require 'test/unit' +require 'planet/sift' + +class SiftTestCase < Test::Unit::TestCase + ATOMNS = 'xmlns="http://www.w3.org/2005/Atom"' + + def test_empty_formatting_elements + # http://github.com/bronson/mars/commit/775bc2a397c7812ae67b9979f288c3c835aab059 + title = "<i/>" + doc = Planet::XmlParser.parse(title) + Planet.sift doc, nil + assert_equal '', doc.elements['title/div'].to_a.join + end +end diff --git a/test/xmlparser.rb b/test/xmlparser.rb index df1d26d..6ac79c9 100644 --- a/test/xmlparser.rb +++ b/test/xmlparser.rb @@ -15,4 +15,10 @@ def test_122 doc = Planet::XmlParser.parse('') assert_nil doc.to_s.index('&amp;') end + + def test_bozo + # http://github.com/bronson/mars/commit/567e2f3f459d446f0530bbd4c8acb00dde378420 + doc = Planet::XmlParser.parse('') + assert doc.bozo + end end