|
5 | 5 | * For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/MIT
|
6 | 6 | */
|
7 | 7 |
|
8 |
| -import * as parse5 from 'parse5'; |
9 |
| -import { DocumentFragment } from '@parse5/tools'; |
10 |
| - |
11 |
| -function isSingleStyleNodeContainingSingleTextNode(node: DocumentFragment) { |
12 |
| - if (node.childNodes.length !== 1) { |
13 |
| - return false; |
14 |
| - } |
15 |
| - |
16 |
| - const style = node.childNodes[0]; |
17 |
| - |
18 |
| - if (style.nodeName !== 'style' || style.childNodes.length !== 1) { |
19 |
| - return false; |
20 |
| - } |
21 |
| - |
22 |
| - const textNode = style.childNodes[0]; |
23 |
| - |
24 |
| - return textNode.nodeName === '#text'; |
25 |
| -} |
| 8 | +/** |
| 9 | + * Per the HTML spec on restrictions for "raw text elements" like `<style>`: |
| 10 | + * |
| 11 | + * > The text in raw text and escapable raw text elements must not contain any occurrences of the string |
| 12 | + * > "</" (U+003C LESS-THAN SIGN, U+002F SOLIDUS) followed by characters that case-insensitively match the tag name of |
| 13 | + * > the element followed by one of: |
| 14 | + * > - U+0009 CHARACTER TABULATION (tab) |
| 15 | + * > - U+000A LINE FEED (LF) |
| 16 | + * > - U+000C FORM FEED (FF) |
| 17 | + * > - U+000D CARRIAGE RETURN (CR) |
| 18 | + * > - U+0020 SPACE |
| 19 | + * > - U+003E GREATER-THAN SIGN (>), or |
| 20 | + * > - U+002F SOLIDUS (/) |
| 21 | + * @see https://html.spec.whatwg.org/multipage/syntax.html#cdata-rcdata-restrictions |
| 22 | + */ |
| 23 | +const INVALID_STYLE_CONTENT = /<\/style[\t\n\f\r >/]/i; |
26 | 24 |
|
27 | 25 | /**
|
28 |
| - * The text content inside `<style>` is a special case. It is _only_ rendered by the LWC engine itself; <style> tags |
29 |
| - * are disallowed inside of templates. Also, we want to avoid over-escaping, since CSS containing strings like |
30 |
| - * `&` and `"` is not valid CSS (even when inside a `<style>` element). |
| 26 | + * The text content inside `<style>` is a special case. It is _only_ rendered by the LWC engine itself; `<style>` tags |
| 27 | + * are disallowed inside of HTML templates. |
| 28 | + * |
| 29 | + * The `<style>` tag is unusual in how it's defined in HTML. Like `<script>`, it is considered a "raw text element," |
| 30 | + * which means that it is parsed as raw text, but certain character sequences are disallowed, namely to avoid XSS |
| 31 | + * attacks like `</style><script>alert("pwned")</script>`. |
| 32 | + * |
| 33 | + * This also means that we cannot use "normal" HTML escaping inside `<style>` tags, e.g. we cannot use `<`, |
| 34 | + * `>`, etc., because these are treated as-is by the HTML parser. |
| 35 | + * |
31 | 36 | *
|
32 |
| - * However, to avoid XSS attacks, we still need to check for things like `</style><script>alert("pwned")</script>`, |
33 |
| - * since a user could use that inside of a *.css file to break out of a <style> element. |
34 | 37 | * @param contents CSS source to validate
|
35 | 38 | * @throws Throws if the contents provided are not valid.
|
| 39 | + * @see https://html.spec.whatwg.org/multipage/syntax.html#raw-text-elements |
36 | 40 | * @see https://github.com/salesforce/lwc/issues/3439
|
37 | 41 | * @example
|
38 | 42 | * validateStyleTextContents('div { color: red }') // Ok
|
39 | 43 | * validateStyleTextContents('</style><script>alert("pwned")</script>') // Throws
|
40 | 44 | */
|
41 | 45 | export function validateStyleTextContents(contents: string): void {
|
42 |
| - // If parse5 parses this as more than one `<style>` tag, then it is unsafe to be rendered as-is |
43 |
| - const fragment = parse5.parseFragment(`<style>${contents}</style>`); |
44 |
| - |
45 |
| - if (!isSingleStyleNodeContainingSingleTextNode(fragment)) { |
| 46 | + if (INVALID_STYLE_CONTENT.test(contents)) { |
46 | 47 | throw new Error(
|
47 | 48 | 'CSS contains unsafe characters and cannot be serialized inside a style element'
|
48 | 49 | );
|
|
0 commit comments