From 959e259c19a5da4e82fb33bbd1ec6be6900110dc Mon Sep 17 00:00:00 2001 From: Martii Date: Fri, 20 Jun 2014 01:35:42 -0600 Subject: [PATCH 1/7] Swap out simple-xss for sanitize-html Advantages: * **Works** unlike simple-xss * Lots of maintainers/contributors * Smaller than simple-xss * Correctly allows sanitizing text **first** then applying markdown * Optional list is included for current defaults... this **can be removed** but I would feel safer matching it in case something comes down the line we don't like. * Does filter out `javascript:` items just like xss [npm homepage](https://www.npmjs.org/package/sanitize-html) [gh homepage](https://github.com/punkave/sanitize-html) Related: * #168 * #125 Tested in dev on [this page](http://localhost:8080/scripts/marti/httplocalhost.localdomain/RFC_2606%C2%A73_-_license_and_licence_Unit_Test) --- libs/markdown.js | 68 +++++++++++++++++++++++++++++++++++++++++++++--- package.json | 2 +- 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/libs/markdown.js b/libs/markdown.js index a911a1777..6d2c0a3ee 100644 --- a/libs/markdown.js +++ b/libs/markdown.js @@ -1,6 +1,6 @@ var marked = require('marked'); var hljs = require('highlight.js'); -var xss = require('simple-xss'); +var sanitizeHtml = require('sanitize-html'); var renderer = new marked.Renderer(); // Automatically generate an anchor for each header @@ -33,11 +33,73 @@ marked.setOptions({ tables: true, breaks: true, pedantic: false, - sanitize: false, // we use xss to sanitize HTML + sanitize: false, // we use sanitize-html to sanitize HTML smartLists: true, smartypants: false }); exports.renderMd = function (text) { - return xss(marked(text)); + return marked(sanitizeHtml(text), { + allowedTags: [ + 'h3', + 'h4', + 'h5', + 'h6', + 'blockquote', + 'p', + 'a', + 'ul', + 'ol', + 'nl', + 'li', + 'b', + 'i', + 'strong', + 'em', + 'strike', + 'code', + 'hr', + 'br', + 'div', + 'table', + 'thead', + 'caption', + 'tbody', + 'tr', + 'th', + 'td', + 'pre' + ], + allowedAttributes: { + a: [ + 'href', + 'name', + 'target' + ], + // We don't currently allow img itself by default, but this + // would make sense if we did + img: [ + 'src' + ] + }, + // Lots of these won't come up by default because we don't allow them + selfClosing: [ + 'img', + 'br', + 'hr', + 'area', + 'base', + 'basefont', + 'input', + 'link', + 'meta' + ], + // URL schemes we permit + allowedSchemes: [ + 'http', + 'https', + 'ftp', + 'mailto' + ] + }); }; diff --git a/package.json b/package.json index 89fc3bfdf..62736dfd5 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,7 @@ "async": "*", "aws-sdk": "*", "toobusy-js": "*", - "simple-xss": "*", + "sanitize-html": "*", "underscore": "*", "moment": "*", "github": "*", From fafa04c97d621707d3caa8746b2215825220ecda Mon Sep 17 00:00:00 2001 From: Martii Date: Fri, 20 Jun 2014 02:36:06 -0600 Subject: [PATCH 2/7] Flip order of processing and allow img tag * Swap to markdown first then sanitize-html to allow html code blocks * Allow img tag ... this deviates from their defaults so we need to define our own. --- libs/markdown.js | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/libs/markdown.js b/libs/markdown.js index 6d2c0a3ee..5c34954f0 100644 --- a/libs/markdown.js +++ b/libs/markdown.js @@ -39,7 +39,7 @@ marked.setOptions({ }); exports.renderMd = function (text) { - return marked(sanitizeHtml(text), { + return sanitizeHtml(marked(text), { allowedTags: [ 'h3', 'h4', @@ -68,7 +68,8 @@ exports.renderMd = function (text) { 'tr', 'th', 'td', - 'pre' + 'pre', + 'img' ], allowedAttributes: { a: [ @@ -76,10 +77,10 @@ exports.renderMd = function (text) { 'name', 'target' ], - // We don't currently allow img itself by default, but this - // would make sense if we did img: [ - 'src' + 'src', + 'alt', + 'title' ] }, // Lots of these won't come up by default because we don't allow them From 7097881d1ac9b9fd43f0a2fec77bfc14640edb43 Mon Sep 17 00:00:00 2001 From: Martii Date: Fri, 20 Jun 2014 02:59:27 -0600 Subject: [PATCH 3/7] Revert last commit... otherwise DOM is stripped of highlighting capabilities even though it doesn't currently work --- libs/markdown.js | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/libs/markdown.js b/libs/markdown.js index 5c34954f0..3b8f1d48f 100644 --- a/libs/markdown.js +++ b/libs/markdown.js @@ -39,7 +39,7 @@ marked.setOptions({ }); exports.renderMd = function (text) { - return sanitizeHtml(marked(text), { + return marked(sanitizeHtml(text), { allowedTags: [ 'h3', 'h4', @@ -68,8 +68,7 @@ exports.renderMd = function (text) { 'tr', 'th', 'td', - 'pre', - 'img' + 'pre' ], allowedAttributes: { a: [ @@ -78,9 +77,7 @@ exports.renderMd = function (text) { 'target' ], img: [ - 'src', - 'alt', - 'title' + 'src' ] }, // Lots of these won't come up by default because we don't allow them From c31e8db5be29947a6e86f8dc8b8f8417cc8f50b8 Mon Sep 17 00:00:00 2001 From: Martii Date: Fri, 20 Jun 2014 03:10:36 -0600 Subject: [PATCH 4/7] Match some of [GH's](https://github.com/github/markup#html-sanitization) stuff. * Still need to see if sanitize-html has a wildcard --- libs/markdown.js | 51 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/libs/markdown.js b/libs/markdown.js index 3b8f1d48f..67e40c176 100644 --- a/libs/markdown.js +++ b/libs/markdown.js @@ -41,43 +41,61 @@ marked.setOptions({ exports.renderMd = function (text) { return marked(sanitizeHtml(text), { allowedTags: [ + 'h1', + 'h2', 'h3', 'h4', 'h5', 'h6', - 'blockquote', + 'h7', + 'h8', 'p', - 'a', - 'ul', - 'ol', - 'nl', - 'li', + 'div', + 'blockquote', + 'pre', 'b', 'i', 'strong', 'em', - 'strike', + 'tt', 'code', - 'hr', - 'br', - 'div', + 'ins', + 'del', + 'sup', + 'sub', + 'kbd', + 'samp', + 'q', + 'var', + 'ol', + 'ul', + 'li', + 'dl', + 'dt', + 'dd', 'table', 'thead', - 'caption', 'tbody', + 'tfoot', 'tr', - 'th', 'td', - 'pre' + 'th', + 'br', + 'hr', + 'ruby', + 'rt', + 'rp' ], allowedAttributes: { a: [ - 'href', - 'name', - 'target' + 'href' ], img: [ 'src' + ], + div: [ + 'itemscope', + 'itemtype' ] }, // Lots of these won't come up by default because we don't allow them @@ -96,7 +114,6 @@ exports.renderMd = function (text) { allowedSchemes: [ 'http', 'https', - 'ftp', 'mailto' ] }); From 2b5e93de81b81b3e5c49aa5e66cbed0ce8d7c4c2 Mon Sep 17 00:00:00 2001 From: Martii Date: Fri, 20 Jun 2014 03:47:41 -0600 Subject: [PATCH 5/7] Move html whitelist to a json file so it doesn't clutter up the main code and read in --- libs/htmlwhitelist.json | 76 ++++++++++++++++++++++++++++++++++++++ libs/markdown.js | 82 ++--------------------------------------- 2 files changed, 80 insertions(+), 78 deletions(-) create mode 100644 libs/htmlwhitelist.json diff --git a/libs/htmlwhitelist.json b/libs/htmlwhitelist.json new file mode 100644 index 000000000..1093c91ab --- /dev/null +++ b/libs/htmlwhitelist.json @@ -0,0 +1,76 @@ +{ + "allowedTags": [ + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "h7", + "h8", + "p", + "div", + "blockquote", + "pre", + "b", + "i", + "strong", + "em", + "tt", + "code", + "ins", + "del", + "sup", + "sub", + "kbd", + "samp", + "q", + "var", + "ol", + "ul", + "li", + "dl", + "dt", + "dd", + "table", + "thead", + "tbody", + "tfoot", + "tr", + "td", + "th", + "br", + "hr", + "ruby", + "rt", + "rp" + ], + "allowedAttributes": { + "a": [ + "href" + ], + "img": [ + "src" + ], + "div": [ + "itemscope", + "itemtype" + ] + }, + "selfClosing": [ + "img", + "br", + "hr", + "area", + "base", + "basefont", + "input", + "link", + "meta" + ], + "allowedSchemes": [ + "http", + "https", + "mailto" + ] +} diff --git a/libs/markdown.js b/libs/markdown.js index 67e40c176..bf4018f4f 100644 --- a/libs/markdown.js +++ b/libs/markdown.js @@ -1,6 +1,8 @@ var marked = require('marked'); var hljs = require('highlight.js'); var sanitizeHtml = require('sanitize-html'); +var htmlWhitelist = require('./htmlwhitelist.json'); + var renderer = new marked.Renderer(); // Automatically generate an anchor for each header @@ -39,82 +41,6 @@ marked.setOptions({ }); exports.renderMd = function (text) { - return marked(sanitizeHtml(text), { - allowedTags: [ - 'h1', - 'h2', - 'h3', - 'h4', - 'h5', - 'h6', - 'h7', - 'h8', - 'p', - 'div', - 'blockquote', - 'pre', - 'b', - 'i', - 'strong', - 'em', - 'tt', - 'code', - 'ins', - 'del', - 'sup', - 'sub', - 'kbd', - 'samp', - 'q', - 'var', - 'ol', - 'ul', - 'li', - 'dl', - 'dt', - 'dd', - 'table', - 'thead', - 'tbody', - 'tfoot', - 'tr', - 'td', - 'th', - 'br', - 'hr', - 'ruby', - 'rt', - 'rp' - ], - allowedAttributes: { - a: [ - 'href' - ], - img: [ - 'src' - ], - div: [ - 'itemscope', - 'itemtype' - ] - }, - // Lots of these won't come up by default because we don't allow them - selfClosing: [ - 'img', - 'br', - 'hr', - 'area', - 'base', - 'basefont', - 'input', - 'link', - 'meta' - ], - // URL schemes we permit - allowedSchemes: [ - 'http', - 'https', - 'mailto' - ] - }); + console.log(htmlWhitelist.allowedTags); + return marked(sanitizeHtml(text), htmlWhitelist); }; From 6891f97dcdd6c895aed792bdce0542c25fcda67b Mon Sep 17 00:00:00 2001 From: Martii Date: Fri, 20 Jun 2014 05:12:31 -0600 Subject: [PATCH 6/7] White space adjustment and removal of console message. * Prepping for when I get back --- libs/markdown.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/libs/markdown.js b/libs/markdown.js index bf4018f4f..349d1e0c6 100644 --- a/libs/markdown.js +++ b/libs/markdown.js @@ -2,7 +2,6 @@ var marked = require('marked'); var hljs = require('highlight.js'); var sanitizeHtml = require('sanitize-html'); var htmlWhitelist = require('./htmlwhitelist.json'); - var renderer = new marked.Renderer(); // Automatically generate an anchor for each header @@ -41,6 +40,5 @@ marked.setOptions({ }); exports.renderMd = function (text) { - console.log(htmlWhitelist.allowedTags); return marked(sanitizeHtml(text), htmlWhitelist); }; From 41c359fb156c6b909b7cc52787bf6c69be758457 Mon Sep 17 00:00:00 2001 From: Martii Date: Fri, 20 Jun 2014 12:33:46 -0600 Subject: [PATCH 7/7] Rename json to be more specific and change affected code * Gotta run now --- libs/{htmlwhitelist.json => htmlWhitelistPost.json} | 0 libs/markdown.js | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename libs/{htmlwhitelist.json => htmlWhitelistPost.json} (100%) diff --git a/libs/htmlwhitelist.json b/libs/htmlWhitelistPost.json similarity index 100% rename from libs/htmlwhitelist.json rename to libs/htmlWhitelistPost.json diff --git a/libs/markdown.js b/libs/markdown.js index 349d1e0c6..d60a8abaf 100644 --- a/libs/markdown.js +++ b/libs/markdown.js @@ -1,7 +1,7 @@ var marked = require('marked'); var hljs = require('highlight.js'); var sanitizeHtml = require('sanitize-html'); -var htmlWhitelist = require('./htmlwhitelist.json'); +var htmlWhitelistPost = require('./htmlWhitelistPost.json'); var renderer = new marked.Renderer(); // Automatically generate an anchor for each header @@ -40,5 +40,5 @@ marked.setOptions({ }); exports.renderMd = function (text) { - return marked(sanitizeHtml(text), htmlWhitelist); + return marked(sanitizeHtml(text), htmlWhitelistPost); };