braintree · cgdibble · Mar 1, 2022 · Feb 22, 2022 · sshropshire · Feb 22, 2022
@@ -1,3 +1,23 @@
+# unreleased
+
+**Breaking Changes**
+
+- Decode HTML characters automatically that would result in an XSS vulnerability when rendering links via a server rendered HTML file
+
+```js
+// decodes to javacript:alert('XSS')
+const vulnerableUrl =
+  "&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041";
+
+sanitizeUrl(vulnerableUrl); // 'about:blank'
+
+const okUrl = "https://example.com/" + vulnerableUrl;
+
+// since the javascript bit is in the path instead of the protocol
+// this is successfully sanitized
+sanitizeUrl(okUrl); // 'https://example.com/javascript:alert('XSS');
+```
+
 # 5.0.2
 
 - Fix issue where certain invisible white space characters were not being sanitized (#35)

@@ -15,8 +15,15 @@ sanitizeUrl("https://example.com"); // 'https://example.com'
 sanitizeUrl("http://example.com"); // 'http://example.com'
 sanitizeUrl("www.example.com"); // 'www.example.com'
 sanitizeUrl("mailto:hello@example.com"); // 'mailto:hello@example.com'
+sanitizeUrl(
+  "&#104;&#116;&#116;&#112;&#115;&#0000058//&#101;&#120;&#97;&#109;&#112;&#108;&#101;&#46;&#99;&#111;&#109;"
+); // https://example.com
 
 sanitizeUrl("javascript:alert(document.domain)"); // 'about:blank'
 sanitizeUrl("jAvasCrIPT:alert(document.domain)"); // 'about:blank'
 sanitizeUrl(decodeURIComponent("JaVaScRiP%0at:alert(document.domain)")); // 'about:blank'
+// HTML encoded javascript:alert('XSS')
+sanitizeUrl(
+  "&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041"
+); // 'about:blank'
 ```
@@ -92,6 +92,30 @@ describe("sanitizeUrl", () => {
     );
   });
 
+  it("decodes html entities", () => {
+    // all these decode to javascript:alert('xss');
+    const attackVectors = [
+      "&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041",
+      "&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;",
+      "&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29",
+      "jav&#x09;ascript:alert('XSS');",
+      " &#14; javascript:alert('XSS');",
+    ];
+
+    attackVectors.forEach((vector) => {
+      expect(sanitizeUrl(vector)).toBe("about:blank");
+    });
+
+    // https://example.com/javascript:alert('XSS')
+    // since the javascript is the url path, and not the protocol,
+    // this url is technically sanitized
+    expect(
+      sanitizeUrl(
+        "&#104;&#116;&#116;&#112;&#115;&#0000058//&#101;&#120;&#97;&#109;&#112;&#108;&#101;&#46;&#99;&#111;&#109;/&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041"
+      )
+    ).toBe("https://example.com/javascript:alert('XSS')");
+  });
+
   describe("invalid protocols", () => {
     describe.each(["javascript", "data", "vbscript"])("%s", (protocol) => {
       it(`replaces ${protocol} urls with about:blank`, () => {

@@ -1,4 +1,5 @@
 const invalidProtocolRegex = /^([^\w]*)(javascript|data|vbscript)/im;
+const htmlEntitiesRegex = /&#(\w+)(^\w|;)?/g;
 const ctrlCharactersRegex =
   /[\u0000-\u001F\u007F-\u009F\u2000-\u200D\uFEFF]/gim;
 const urlSchemeRegex = /^([^:]+):/gm;
@@ -8,13 +9,22 @@ function isRelativeUrlWithoutProtocol(url: string): boolean {
   return relativeFirstCharacters.indexOf(url[0]) > -1;
 }
 
+// adapted from https://stackoverflow.com/a/29824550/2601552
+function decodeHtmlCharacters(str: string) {
+  return str.replace(htmlEntitiesRegex, (match, dec) => {
+    return String.fromCharCode(dec);
+  });
+}
+
 export function sanitizeUrl(url?: string): string {
-  if (!url) {
+  const sanitizedUrl = decodeHtmlCharacters(url || "")
+    .replace(ctrlCharactersRegex, "")
+    .trim();
+
+  if (!sanitizedUrl) {
     return "about:blank";
   }
 
-  const sanitizedUrl = url.replace(ctrlCharactersRegex, "").trim();
-
   if (isRelativeUrlWithoutProtocol(sanitizedUrl)) {
     return sanitizedUrl;
   }