queso / dryopteris forked from brynary/dryopteris

HTML sanitization using Nokogiri

This URL has Read+Write access

dryopteris / benchmark.rb
100755 68 lines (55 sloc) 1.338 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env ruby
require 'rubygems'
require 'open-uri'
require 'hpricot'
require File.expand_path(File.dirname(__FILE__) + "/lib/dryopteris")
require 'benchmark'
require "action_view"
require "sanitize"
 
class RailsSanitize
  include ActionView::Helpers::SanitizeHelper
  extend ActionView::Helpers::SanitizeHelper::ClassMethods
end
 
class HTML5libSanitize
  require 'html5/html5parser'
  require 'html5/liberalxmlparser'
  require 'html5/treewalkers'
  require 'html5/treebuilders'
  require 'html5/serializer'
  require 'html5/sanitizer'
 
  include HTML5
 
  def sanitize(html)
    HTMLParser.parse_fragment(html, {
      :tokenizer => HTMLSanitizer,
      :encoding => 'utf-8',
      :tree => TreeBuilders::REXML::TreeBuilder
    }).to_s
  end
end
 
uri = URI.parse('http://www.slashdot.com/')
content = uri.read
 
N = 100 #0
 
Benchmark.bm do |x|
  x.report('Dryopteris') do
    N.times do
      Dryopteris.sanitize(content)
    end
  end
 
  x.report('ActionView') do
    sanitizer = RailsSanitize.new
    
    N.times do
      sanitizer.sanitize(content)
    end
  end
  
  x.report('Sanitize') do
    N.times do
      Sanitize.clean(content, Sanitize::Config::RELAXED)
    end
  end
  
  x.report('HTML5lib') do
    sanitizer = HTML5libSanitize.new
    
    N.times do
      sanitizer.sanitize(content)
    end
  end
end