Skip to content

Commit

Permalink
feat: decode html entities before sanitizing (#40)
Browse files Browse the repository at this point in the history
  • Loading branch information
crookedneighbor committed Mar 1, 2022
1 parent abff5b1 commit 8f7371c
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 3 deletions.
20 changes: 20 additions & 0 deletions CHANGELOG.md
@@ -1,3 +1,23 @@
# unreleased

**Breaking Changes**

- Decode HTML characters automatically that would result in an XSS vulnerability when rendering links via a server rendered HTML file

```js
// decodes to javacript:alert('XSS')
const vulnerableUrl =
"&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041";

sanitizeUrl(vulnerableUrl); // 'about:blank'

const okUrl = "https://example.com/" + vulnerableUrl;

// since the javascript bit is in the path instead of the protocol
// this is successfully sanitized
sanitizeUrl(okUrl); // 'https://example.com/javascript:alert('XSS');
```

# 5.0.2

- Fix issue where certain invisible white space characters were not being sanitized (#35)
Expand Down
7 changes: 7 additions & 0 deletions README.md
Expand Up @@ -15,8 +15,15 @@ sanitizeUrl("https://example.com"); // 'https://example.com'
sanitizeUrl("http://example.com"); // 'http://example.com'
sanitizeUrl("www.example.com"); // 'www.example.com'
sanitizeUrl("mailto:hello@example.com"); // 'mailto:hello@example.com'
sanitizeUrl(
"https&#0000058//example.com"
); // https://example.com

sanitizeUrl("javascript:alert(document.domain)"); // 'about:blank'
sanitizeUrl("jAvasCrIPT:alert(document.domain)"); // 'about:blank'
sanitizeUrl(decodeURIComponent("JaVaScRiP%0at:alert(document.domain)")); // 'about:blank'
// HTML encoded javascript:alert('XSS')
sanitizeUrl(
"&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041"
); // 'about:blank'
```
24 changes: 24 additions & 0 deletions src/__tests__/test.ts
Expand Up @@ -92,6 +92,30 @@ describe("sanitizeUrl", () => {
);
});

it("decodes html entities", () => {
// all these decode to javascript:alert('xss');
const attackVectors = [
"&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041",
"javascript:alert('XSS')",
"&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29",
"jav	ascript:alert('XSS');",
"  javascript:alert('XSS');",
];

attackVectors.forEach((vector) => {
expect(sanitizeUrl(vector)).toBe("about:blank");
});

// https://example.com/javascript:alert('XSS')
// since the javascript is the url path, and not the protocol,
// this url is technically sanitized
expect(
sanitizeUrl(
"https&#0000058//example.com/&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041"
)
).toBe("https://example.com/javascript:alert('XSS')");
});

describe("invalid protocols", () => {
describe.each(["javascript", "data", "vbscript"])("%s", (protocol) => {
it(`replaces ${protocol} urls with about:blank`, () => {
Expand Down
16 changes: 13 additions & 3 deletions src/index.ts
@@ -1,4 +1,5 @@
const invalidProtocolRegex = /^([^\w]*)(javascript|data|vbscript)/im;
const htmlEntitiesRegex = /&#(\w+)(^\w|;)?/g;
const ctrlCharactersRegex =
/[\u0000-\u001F\u007F-\u009F\u2000-\u200D\uFEFF]/gim;
const urlSchemeRegex = /^([^:]+):/gm;
Expand All @@ -8,13 +9,22 @@ function isRelativeUrlWithoutProtocol(url: string): boolean {
return relativeFirstCharacters.indexOf(url[0]) > -1;
}

// adapted from https://stackoverflow.com/a/29824550/2601552
function decodeHtmlCharacters(str: string) {
return str.replace(htmlEntitiesRegex, (match, dec) => {
return String.fromCharCode(dec);
});
}

export function sanitizeUrl(url?: string): string {
if (!url) {
const sanitizedUrl = decodeHtmlCharacters(url || "")
.replace(ctrlCharactersRegex, "")
.trim();

if (!sanitizedUrl) {
return "about:blank";
}

const sanitizedUrl = url.replace(ctrlCharactersRegex, "").trim();

if (isRelativeUrlWithoutProtocol(sanitizedUrl)) {
return sanitizedUrl;
}
Expand Down

0 comments on commit 8f7371c

Please sign in to comment.