Permalink
Browse files

Resolve conflict from upstream

  • Loading branch information...
2 parents ff9b678 + 7295b3f commit 39381b4c9fc461e8d7f77e0db78530490e506b69 @jrust jrust committed Apr 9, 2012
Showing with 16 additions and 3 deletions.
  1. +2 −1 README.markdown
  2. +3 −0 lib/readability.rb
  3. +2 −2 ruby-readability.gemspec
  4. +9 −0 spec/readability_spec.rb
View
3 README.markdown
@@ -30,7 +30,8 @@ You may provide options to Readability::Document.new, including:
:attributes - whitelist of allowed attributes
:debug - provide debugging output, defaults false
:encoding - if the page is of a known encoding, you can specify it; if left unspecified,
- the encoding will be guessed (only in Ruby 1.9.x)
+ the encoding will be guessed (only in Ruby 1.9.x). If you wish to disable guessing,
+ supply :do_not_guess_encoding => true.
:html_headers - in Ruby 1.9.x these will be passed to the guess_html_encoding gem
to aid with guessing the HTML encoding
:ignore_image_format - for use with .images. For example: :ignore_image_format => ["gif", "png"]
View
3 lib/readability.rb
@@ -48,6 +48,9 @@ def make_html
@html = Nokogiri::HTML(@input, nil, @options[:encoding])
# In case Nokogiri returns an empty document which can happen, for example, if @input is an empty string
@html = Nokogiri::HTML('<body />', nil, @options[:encoding]) if @html.children.length == 1
+
+ # Remove html comment tags
+ @html.xpath('//comment()').each { |i| i.remove }
end
def images(content=nil, reload=false)
View
4 ruby-readability.gemspec
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
Gem::Specification.new do |s|
s.name = "ruby-readability"
- s.version = '0.5.1'
+ s.version = '0.5.2'
s.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
s.email = ["andrew@iterationlabs.com"]
s.homepage = "http://github.com/iterationlabs/ruby-readability"
@@ -21,5 +21,5 @@ Gem::Specification.new do |s|
s.add_development_dependency "rspec-expectations", ">= 2.8"
s.add_development_dependency "rr", ">= 1.0"
s.add_dependency 'nokogiri', '>= 1.4.2'
- s.add_dependency 'guess_html_encoding', '>= 0.0.2'
+ s.add_dependency 'guess_html_encoding', '>= 0.0.4'
end
View
9 spec/readability_spec.rb
@@ -347,4 +347,13 @@
end
end
end
+
+ describe "strip html comments" do
+ it "should strip the html comments tag" do
+ doc = Readability::Document.new("<html><head><meta http-equiv='content-type' content='text/html; charset=LATIN1'></head><body><div>hi!<!-- bye~ --></div></body></html>")
+ content = doc.content
+ content.should include("hi!")
+ content.should_not include("bye")
+ end
+ end
end

0 comments on commit 39381b4

Please sign in to comment.