From a11498de9e283cd457b35ee252983662f7452aa9 Mon Sep 17 00:00:00 2001 From: Ryan Grove Date: Mon, 15 Jun 2020 14:27:07 -0700 Subject: [PATCH] Fix sanitization bypass in HTML foreign content https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m --- README.md | 11 +++++++++++ lib/sanitize/config/default.rb | 2 +- test/test_clean_element.rb | 30 ++++++++++++++++++++---------- test/test_malicious_html.rb | 13 +++++++++++++ 4 files changed, 45 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 86cd366..d867e4a 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,11 @@ Sanitize can sanitize the following types of input: * Standalone CSS stylesheets * Standalone CSS properties +However, please note that Sanitize _cannot_ fully sanitize the contents of +`` or `` elements, since these elements don't follow the same parsing +rules as the rest of HTML. If this is something you need, you may want to look +for another solution. + ### HTML Fragments A fragment is a snippet of HTML that doesn't contain a root-level `` @@ -415,6 +420,12 @@ elements not in this array will be removed. ] ``` +**Warning:** Sanitize cannot fully sanitize the contents of `` or `` +elements, since these elements don't follow the same parsing rules as the rest +of HTML. If you add `math` or `svg` to the allowlist, you must assume that any +content inside them will be allowed, even if that content would otherwise be +removed by Sanitize. + #### :parser_options (Hash) [Parsing options](https://github.com/rubys/nokogumbo/tree/v2.0.1#parsing-options) supplied to `nokogumbo`. diff --git a/lib/sanitize/config/default.rb b/lib/sanitize/config/default.rb index 9cee03e..4dd1d22 100644 --- a/lib/sanitize/config/default.rb +++ b/lib/sanitize/config/default.rb @@ -74,7 +74,7 @@ module Config # the specified elements (when filtered) will be removed, and the contents # of all other filtered elements will be left behind. :remove_contents => %w[ - iframe noembed noframes noscript script style + iframe math noembed noframes noscript plaintext script style svg xmp ], # Transformers allow you to filter or alter nodes using custom logic. See diff --git a/test/test_clean_element.rb b/test/test_clean_element.rb index ba0c030..37a8af0 100644 --- a/test/test_clean_element.rb +++ b/test/test_clean_element.rb @@ -192,21 +192,16 @@ .must_equal '' end - it 'should escape the content of removed `plaintext` elements' do - Sanitize.fragment('hello! ') - .must_equal 'hello! <script>alert(0)</script>' - end - - it 'should escape the content of removed `xmp` elements' do - Sanitize.fragment('hello! ') - .must_equal 'hello! <script>alert(0)</script>' - end - it 'should not preserve the content of removed `iframe` elements' do Sanitize.fragment('') .must_equal '' end + it 'should not preserve the content of removed `math` elements' do + Sanitize.fragment('hello! ') + .must_equal '' + end + it 'should not preserve the content of removed `noembed` elements' do Sanitize.fragment('hello! ') .must_equal '' @@ -222,6 +217,11 @@ .must_equal '' end + it 'should not preserve the content of removed `plaintext` elements' do + Sanitize.fragment('hello! ') + .must_equal '' + end + it 'should not preserve the content of removed `script` elements' do Sanitize.fragment('') .must_equal '' @@ -232,6 +232,16 @@ .must_equal '' end + it 'should not preserve the content of removed `svg` elements' do + Sanitize.fragment('hello! ') + .must_equal '' + end + + it 'should not preserve the content of removed `xmp` elements' do + Sanitize.fragment('hello! ') + .must_equal '' + end + strings.each do |name, data| it "should clean #{name} HTML" do Sanitize.fragment(data[:html]).must_equal(data[:default]) diff --git a/test/test_malicious_html.rb b/test/test_malicious_html.rb index 945978f..39163b9 100644 --- a/test/test_malicious_html.rb +++ b/test/test_malicious_html.rb @@ -219,4 +219,17 @@ end end end + + # https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m + describe 'foreign content bypass in relaxed config' do + it 'prevents a sanitization bypass via carefully crafted foreign content' do + %w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name| + @s.fragment(%[<#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]). + must_equal '' + + @s.fragment(%[<#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]). + must_equal '' + end + end + end end