public
Fork of halorgium/mephisto
Description: A mirror of the mephisto code-base
Homepage: http://mephistoblog.com/
Clone URL: git://github.com/technoweenie/mephisto.git
Click here to lend your support to: mephisto and make a donation at www.pledgie.com !
update white list plugin

git-svn-id: http://svn.techno-weenie.net/projects/mephisto/trunk@2517 
567b1171-46fb-0310-a4c9-b4bef9110e78
technoweenie (author)
Mon Nov 27 10:32:27 -0800 2006
commit  7e36c99b3ac7ec9a65b993ad583ea06c5de52470
tree    d962b5aac8aedc46c7a9987c270d66ce1abc0fc7
parent  92cd3309fbb3e7ea0f40c6b9dcdab3e2b69ee414
...
10
11
12
13
 
14
15
 
16
17
 
18
19
 
20
21
 
22
23
 
24
25
 
 
 
26
27
28
29
30
31
32
33
34
35
36
 
37
...
10
11
12
 
13
14
 
15
16
 
17
18
 
19
20
 
21
22
 
23
24
 
25
26
27
28
 
 
 
 
 
 
 
 
 
29
30
31
0
@@ -10,26 +10,20 @@ the extensive test suite.
0
 
0
 You can add or remove tags/attributes if you want to customize it a bit.
0
 
0
-add table tags
0
+Add table tags
0
   
0
- WhiteListHelper.tags += %w(table td th)
0
+ WhiteListHelper.tags.merge %w(table td th)
0
 
0
-remove tags
0
+Remove tags
0
   
0
- WhiteListHelper.tags -= %w(div span)
0
+ WhiteListHelper.tags.delete 'div'
0
 
0
-clear any attributes that are allowed for <a> tags
0
+Change allowed attributes
0
 
0
- WhiteListHelper.attributes['a'] = []
0
+ WhiteListHelper.attributes.merge %w(id class style)
0
 
0
-allow a new attribute for the <img> tag
0
+white_list accepts a block for custom tag escaping. Shown below is the default block that white_list uses if none is given.
0
+The block is called for all bad tags, and every text node. node is an instance of HTML::Node (either HTML::Tag or HTML::Text).
0
+bad is nil for text nodes inside good tags, or is the tag name of the bad tag.
0
 
0
- WhiteListHelper.attributes['img'] += %w(style)
0
-
0
-add new tag with attributes
0
-
0
- WhiteListHelper.attributes['table'] = %w(cellpadding cellspacing)
0
-
0
-change allowed attributes for all tags
0
-
0
- WhiteListHelper.attributes[nil] = %w(id class style)
0
\ No newline at end of file
0
+ <%= white_list(@article.body) { |node, bad| white_listed_bad_tags.include?(bad) ? nil : node.to_s.gsub(/</, '&lt;') } %>
0
\ No newline at end of file
...
1
2
 
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
 
 
 
 
 
22
23
24
25
26
27
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
30
31
 
 
 
32
33
34
 
35
36
37
38
39
40
 
 
 
41
 
42
43
44
45
 
46
47
48
 
49
50
51
 
52
53
54
55
56
57
 
58
59
60
61
62
 
 
 
 
 
 
63
...
1
 
2
3
 
 
 
 
 
 
 
 
 
 
 
4
 
 
 
 
 
 
5
6
7
8
9
10
11
 
 
 
 
 
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
 
 
41
42
43
44
45
 
46
47
48
49
50
 
 
51
52
53
54
55
56
 
57
 
58
59
60
 
61
62
63
 
64
65
66
67
68
69
 
70
71
72
73
 
74
75
76
77
78
79
80
81
0
@@ -1,61 +1,79 @@
0
 module WhiteListHelper
0
- PROTOCOL_ATTRIBUTES = %w(src href)
0
+ PROTOCOL_ATTRIBUTES = Set.new %w(src href)
0
   PROTOCOL_SEPARATOR = /:|(&#0*58)|(&#x70)|(%|&#37;)3A/
0
- mattr_reader :tags, :attributes, :protocols
0
- @@tags = %w(strong em b i p code pre tt output samp kbd var sub sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr acronym)
0
- @@attributes = {
0
- 'a' => %w(href),
0
- 'img' => %w(src width height alt),
0
- 'blockquote' => %w(cite),
0
- 'del' => %w(cite datetime),
0
- 'ins' => %w(cite datetime),
0
- nil => %w(title class) }
0
- @@protocols = %w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto feed)
0
- tags.push(*attributes.keys).uniq!
0
 
0
- def white_listed_tags
0
- ::WhiteListHelper.tags
0
- end
0
-
0
- def white_listed_attributes
0
- ::WhiteListHelper.attributes
0
+ [:bad_tags, :tags, :attributes, :protocols].each do |attr|
0
+ klass = class << self; self; end
0
+ klass.send(:define_method, "#{attr}=") { |value| class_variable_set("@@#{attr}", Set.new(value)) }
0
+ define_method("white_listed_#{attr}") { ::WhiteListHelper.send(attr) }
0
+ mattr_reader attr
0
   end
0
 
0
- def white_listed_protocols
0
- ::WhiteListHelper.protocols
0
- end
0
-
0
- def white_list(html, options = {})
0
+ # This White Listing helper will html encode all tags and strip all attributes that aren't specifically allowed.
0
+ # It also strips href/src tags with invalid protocols, like javascript: especially. It does its best to counter any
0
+ # tricks that hackers may use, like throwing in unicode/ascii/hex values to get past the javascript: filters. Check out
0
+ # the extensive test suite.
0
+ #
0
+ # <%= white_list @article.body %>
0
+ #
0
+ # You can add or remove tags/attributes if you want to customize it a bit.
0
+ #
0
+ # Add table tags
0
+ #
0
+ # WhiteListHelper.tags.merge %w(table td th)
0
+ #
0
+ # Remove tags
0
+ #
0
+ # WhiteListHelper.tags.delete 'div'
0
+ #
0
+ # Change allowed attributes
0
+ #
0
+ # WhiteListHelper.attributes.merge %w(id class style)
0
+ #
0
+ # white_list accepts a block for custom tag escaping. Shown below is the default block that white_list uses if none is given.
0
+ # The block is called for all bad tags, and every text node. node is an instance of HTML::Node (either HTML::Tag or HTML::Text).
0
+ # bad is nil for text nodes inside good tags, or is the tag name of the bad tag.
0
+ #
0
+ # <%= white_list(@article.body) { |node, bad| white_listed_bad_tags.include?(bad) ? nil : node.to_s.gsub(/</, '&lt;') } %>
0
+ #
0
+ def white_list(html, options = {}, &block)
0
     return html if html.blank? || !html.include?('<')
0
- (options[:attributes] ||= {}).update(white_listed_attributes)
0
- (options[:tags] ||= []).push(*options[:attributes].keys).push(*white_listed_tags).uniq!
0
+ attrs = Set.new(options[:attributes]).merge(white_listed_attributes)
0
+ tags = Set.new(options[:tags] ).merge(white_listed_tags)
0
+ block ||= lambda { |node, bad| white_listed_bad_tags.include?(bad) ? nil : node.to_s.gsub(/</, '&lt;') }
0
     returning [] do |new_text|
0
       tokenizer = HTML::Tokenizer.new(html)
0
-
0
+ bad = nil
0
       while token = tokenizer.next
0
         node = HTML::Node.parse(nil, 0, 0, token, false)
0
         new_text << case node
0
           when HTML::Tag
0
- unless (options[:tags]).include?(node.name)
0
- node.to_s.gsub(/</, "&lt;")
0
+ unless tags.include?(node.name)
0
+ bad = node.name
0
+ block.call node, bad
0
             else
0
+ bad = nil
0
               if node.closing != :close
0
- attributes = (options[:attributes][nil] || []).push(*(options[:attributes][node.name] || []))
0
                 node.attributes.delete_if do |attr_name, value|
0
- !attributes.include?(attr_name) || (PROTOCOL_ATTRIBUTES.include?(attr_name) && contains_bad_protocols?(value))
0
+ !attrs.include?(attr_name) || (PROTOCOL_ATTRIBUTES.include?(attr_name) && contains_bad_protocols?(value))
0
                 end if attributes.any?
0
               end
0
- node.to_s
0
+ node
0
             end
0
           else
0
- node.to_s.gsub(/</, "&lt;")
0
+ block.call node, bad
0
         end
0
       end
0
     end.join
0
   end
0
   
0
- private
0
+ protected
0
     def contains_bad_protocols?(value)
0
       value =~ PROTOCOL_SEPARATOR && !white_listed_protocols.include?(value.split(PROTOCOL_SEPARATOR).first)
0
     end
0
-end
0
\ No newline at end of file
0
+end
0
+
0
+WhiteListHelper.bad_tags = %w(script)
0
+WhiteListHelper.tags = %w(strong em b i p code pre tt output samp kbd var sub sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr acronym a img blockquote del ins fieldset legend)
0
+WhiteListHelper.attributes = %w(href src width height alt cite datetime title class)
0
+WhiteListHelper.protocols = %w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto feed)
0
\ No newline at end of file
...
5
6
7
8
 
9
10
11
12
13
14
15
 
16
17
18
 
19
20
21
...
37
38
39
40
 
41
42
43
...
64
65
66
67
 
68
69
70
...
92
93
94
95
 
96
97
98
99
 
100
101
102
103
104
 
105
106
107
...
112
113
114
115
 
 
 
 
 
 
 
 
 
116
117
118
...
5
6
7
 
8
9
10
11
12
13
14
 
15
16
17
 
18
19
20
21
...
37
38
39
 
40
41
42
43
...
64
65
66
 
67
68
69
70
...
92
93
94
 
95
96
97
98
 
99
100
101
102
103
 
104
105
106
107
...
112
113
114
 
115
116
117
118
119
120
121
122
123
124
125
126
0
@@ -5,17 +5,17 @@ class WhiteListTest < Test::Unit::TestCase
0
   include WhiteListHelper
0
   public :contains_bad_protocols?
0
 
0
- (WhiteListHelper.tags + WhiteListHelper.attributes.keys).compact.each do |tag_name|
0
+ WhiteListHelper.tags.each do |tag_name|
0
     define_method "test_should_allow_#{tag_name}_tag" do
0
       assert_white_listed "start <#{tag_name} title=\"1\" name=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", "start <#{tag_name} title='1'>foo &lt;bad>bar&lt;/bad> baz</#{tag_name}> end"
0
     end
0
   end
0
 
0
   def test_should_allow_anchors
0
- assert_white_listed %(<a href="foo" onclick="bar"><script>baz</script></a>), "<a href='foo'>&lt;script>baz&lt;/script></a>"
0
+ assert_white_listed %(<a href="foo" onclick="bar"><script>baz</script></a>), "<a href='foo'></a>"
0
   end
0
 
0
- WhiteListHelper.attributes['img'].each do |img_attr|
0
+ %w(src width height alt).each do |img_attr|
0
     define_method "test_should_allow_image_#{img_attr}_attribute" do
0
       assert_white_listed %(<img #{img_attr}="foo" onclick="bar" />), "<img #{img_attr}='foo' />"
0
     end
0
@@ -37,7 +37,7 @@ class WhiteListTest < Test::Unit::TestCase
0
 
0
   def test_should_allow_custom_tags_with_attributes
0
     text = "<fieldset foo='bar'>foo</fieldset>"
0
- assert_equal(text, white_list(text, :attributes => {'fieldset' => %w(foo)}))
0
+ assert_equal(text, white_list(text, :attributes => ['foo']))
0
   end
0
 
0
   [%w(img src), %w(a href)].each do |(tag, attr)|
0
@@ -64,7 +64,7 @@ class WhiteListTest < Test::Unit::TestCase
0
   end
0
 
0
   def test_should_block_script_tag
0
- assert_white_listed %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), "&lt;script src='http:' />&lt;/script>"
0
+ assert_white_listed %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""
0
   end
0
 
0
   [%(<IMG SRC="javascript:alert('XSS');">),
0
@@ -92,16 +92,16 @@ class WhiteListTest < Test::Unit::TestCase
0
   end
0
   
0
   def test_should_sanitize_invalid_script_tag
0
- assert_white_listed %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), "&lt;script />&lt;/script>"
0
+ assert_white_listed %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""
0
   end
0
   
0
   def test_should_sanitize_script_tag_with_multiple_open_brackets
0
- assert_white_listed %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;&lt;script>alert(\"XSS\");//&lt;&lt;/script>"
0
+ assert_white_listed %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;"
0
     assert_white_listed %(<iframe src=http://ha.ckers.org/scriptlet.html\n<), "&lt;iframe src='http:' />&lt;"
0
   end
0
   
0
   def test_should_sanitize_unclosed_script
0
- assert_white_listed %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), "&lt;script src='http:' /><b>"
0
+ assert_white_listed %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), "<b>"
0
   end
0
   
0
   def test_should_sanitize_half_open_scripts
0
@@ -112,7 +112,15 @@ class WhiteListTest < Test::Unit::TestCase
0
     img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
0
     assert_white_listed img_hack, "<img>"
0
   end
0
-
0
+
0
+ def test_should_allow_custom_block
0
+ html = %(<SCRIPT type="javascript">foo</SCRIPT><img>blah</img><blink>blah</blink>)
0
+ safe = white_list html do |node, bad|
0
+ bad == 'script' ? nil : node
0
+ end
0
+ assert_equal "<img>blah</img><blink>blah</blink>", safe
0
+ end
0
+
0
   protected
0
     def assert_white_listed(text, expected = nil)
0
       assert_equal((expected || text), white_list(text))

Comments

    No one has commented yet.