Navigation Menu

Skip to content

Commit

Permalink
allowing #text to return unescaped html entities if the user very car…
Browse files Browse the repository at this point in the history
…efully asks for it.
  • Loading branch information
flavorjones committed Mar 9, 2010
1 parent b2c0f1f commit 244f96f
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 8 deletions.
24 changes: 20 additions & 4 deletions lib/loofah/html/document.rb
Expand Up @@ -19,8 +19,24 @@ class Document < Nokogiri::HTML::Document
# Loofah.document("<h1>Title</h1><div>Content</div>").text
# # => "TitleContent"
#
def text
encode_special_chars xpath("/html/body").inner_text
# By default, the returned text will have HTML entities
# escaped. If you want unescaped entities, and you understand
# that the result is unsafe to render in a browser, then you
# can pass an argument as shown:
#
# frag = Loofah.fragment("&lt;script&gt;alert('EVIL');&lt;/script&gt;")
# # ok for browser:
# frag.text # => "&lt;script&gt;alert('EVIL');&lt;/script&gt;"
# # decidedly not ok for browser:
# frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
#
def text(options={})
result = xpath("/html/body").inner_text
if options[:encode_special_chars] == false
result # possibly dangerous if rendered in a browser
else
encode_special_chars result
end
end
alias :inner_text :text
alias :to_str :text
Expand All @@ -35,8 +51,8 @@ def text
# Loofah.document("<h1>Title</h1><div>Content</div>").to_text
# # => "\nTitle\n\nContent\n"
#
def to_text
Loofah::Helpers.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text
def to_text(options={})
Loofah::Helpers.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
end
end
end
Expand Down
24 changes: 20 additions & 4 deletions lib/loofah/html/document_fragment.rb
Expand Up @@ -35,8 +35,24 @@ def to_s
# Loofah.fragment("<h1>Title</h1><div>Content</div>").text
# # => "TitleContent"
#
def text
encode_special_chars serialize_roots.children.inner_text
# By default, the returned text will have HTML entities
# escaped. If you want unescaped entities, and you understand
# that the result is unsafe to render in a browser, then you
# can pass an argument as shown:
#
# frag = Loofah.fragment("&lt;script&gt;alert('EVIL');&lt;/script&gt;")
# # ok for browser:
# frag.text # => "&lt;script&gt;alert('EVIL');&lt;/script&gt;"
# # decidedly not ok for browser:
# frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
#
def text(options={})
result = serialize_roots.children.inner_text
if options[:encode_special_chars] == false
result # possibly dangerous if rendered in a browser
else
encode_special_chars result
end
end
alias :inner_text :text
alias :to_str :text
Expand All @@ -51,8 +67,8 @@ def text
# Loofah.fragment("<h1>Title</h1><div>Content</div>").to_text
# # => "\nTitle\n\nContent\n"
#
def to_text
Loofah::Helpers.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text
def to_text(options={})
Loofah::Helpers.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
end

private
Expand Down
37 changes: 37 additions & 0 deletions test/integration/test_scrubbers.rb
Expand Up @@ -18,6 +18,7 @@ class TestScrubbers < Test::Unit::TestCase

ENTITY_HACK_ATTACK = "<div><div>Hack attack!</div><div>&lt;script&gt;alert('evil')&lt;/script&gt;</div></div>"
ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!&lt;script&gt;alert('evil')&lt;/script&gt;"
ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!<script>alert('evil')</script>"

context "Document" do
context "#scrub!" do
Expand Down Expand Up @@ -89,6 +90,24 @@ class TestScrubbers < Test::Unit::TestCase

assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
end

context "with encode_special_chars => false" do
should "leave behind only inner text with html entities unescaped" do
doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
result = doc.text(:encode_special_chars => false)

assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
end
end

context "with encode_special_chars => true" do
should "leave behind only inner text with html entities still escaped" do
doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
result = doc.text(:encode_special_chars => true)

assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
end
end
end

context "#to_s" do
Expand Down Expand Up @@ -239,6 +258,24 @@ class TestScrubbers < Test::Unit::TestCase

assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
end

context "with encode_special_chars => false" do
should "leave behind only inner text with html entities unescaped" do
doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
result = doc.text(:encode_special_chars => false)

assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
end
end

context "with encode_special_chars => true" do
should "leave behind only inner text with html entities still escaped" do
doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
result = doc.text(:encode_special_chars => true)

assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
end
end
end

context "#to_s" do
Expand Down

0 comments on commit 244f96f

Please sign in to comment.