Skip to content

Commit

Permalink
Rework handling of raw-text elements to avoid browser confusion
Browse files Browse the repository at this point in the history
  • Loading branch information
mikesamuel committed Nov 2, 2011
1 parent 95dee33 commit 2027d3d
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CLASSPATH=lib/guava-libraries/guava.jar:lib/jsr305/jsr305.jar
TEST_CLASSPATH=$(CLASSPATH):lib/htmlparser-1.3/htmlparser-1.3.jar:lib/junit/junit.jar:lib/commons-codec-1.4/commons-codec-1.4.jar:benchmark-data
JAVAC_FLAGS=-source 1.5 -target 1.5 -Xlint
JAVAC_FLAGS=-source 1.5 -target 1.5 -Xlint -encoding UTF-8


default: javadoc runtests findbugs
Expand Down
28 changes: 25 additions & 3 deletions src/main/org/owasp/html/HtmlStreamRenderer.java
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ public final void openTag(String elementName, List<String> attrs) {
private void writeOpenTag(String elementName, List<? extends String> attrs)
throws IOException {
if (!open) { throw new IllegalStateException(); }
elementName = HtmlLexer.canonicalName(elementName);
elementName = safeName(elementName);
if (!isValidHtmlName(elementName)) {
error("Invalid element name", elementName);
return;
Expand All @@ -160,8 +160,8 @@ private void writeOpenTag(String elementName, List<? extends String> attrs)
}

switch (HtmlTextEscapingMode.getModeForTag(elementName)) {
case CDATA_SOMETIMES:
case CDATA:
case CDATA_SOMETIMES:
case PLAIN_TEXT:
lastTagOpened = elementName;
pendingUnescaped = new StringBuilder();
Expand Down Expand Up @@ -206,7 +206,7 @@ private void writeOpenTag(String elementName, List<? extends String> attrs)

public final void closeTag(String elementName) {
try {
writeCloseTag(elementName);
writeCloseTag(safeName(elementName));
} catch (IOException ex) {
ioExHandler.handle(ex);
}
Expand Down Expand Up @@ -446,6 +446,28 @@ static void appendNumericEntity(int codepoint, Appendable output)
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
};

/**
* Canonicalizes the element name and possibly substitutes an alternative
* that has more consistent semantics.
*/
static String safeName(String elementName) {
elementName = HtmlLexer.canonicalName(elementName);

// Substitute a reliably non-raw-text element for raw-text and
// plain-text elements.
switch (elementName.length()) {
case 3:
if ("xmp".equals(elementName)) { return "pre"; }
break;
case 7:
if ("listing".equals(elementName)) { return "pre"; }
break;
case 9:
if ("plaintext".equals(elementName)) { return "pre"; }
break;
}
return elementName;
}

static class CloseableHtmlStreamRenderer extends HtmlStreamRenderer
implements Closeable {
Expand Down
19 changes: 10 additions & 9 deletions src/main/org/owasp/html/HtmlTextEscapingMode.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,19 @@ enum HtmlTextEscapingMode {
private static final ImmutableMap<String, HtmlTextEscapingMode> ESCAPING_MODES
= ImmutableMap.<String, HtmlTextEscapingMode>builder()
.put("iframe", CDATA)
// HTML5 does not treat listing as CDATA, but HTML2 does
// at http://www.w3.org/MarkUp/1995-archive/NonStandard.html
// HTML5 does not treat listing as CDATA and treats XMP as deprecated,
// but HTML2 does at
// http://www.w3.org/MarkUp/1995-archive/NonStandard.html
// Listing is not supported by browsers.
.put("listing", CDATA_SOMETIMES)
.put("xmp", CDATA)

// Technically, only if embeds, frames, and scripts, respectively, are
// enabled.
.put("noembed", CDATA_SOMETIMES)
.put("noframes", CDATA_SOMETIMES)
.put("noscript", CDATA_SOMETIMES)
// Technically, noembed, noscript and noframes are CDATA_SOMETIMES but
// we can only be hurt by allowing tag content that looks like text so
// we treat them as regular..
//.put("noembed", CDATA_SOMETIMES)
//.put("noframes", CDATA_SOMETIMES)
//.put("noscript", CDATA_SOMETIMES)
.put("comment", CDATA_SOMETIMES) // IE only

// Runs till end of file.
Expand All @@ -107,8 +110,6 @@ enum HtmlTextEscapingMode {
.put("textarea", RCDATA)
.put("title", RCDATA)

.put("xmp", CDATA)

// Nodes that can't contain content.
// http://www.w3.org/TR/html-markup/syntax.html#void-elements
.put("area", VOID)
Expand Down
4 changes: 2 additions & 2 deletions src/tests/org/owasp/html/HtmlPolicyBuilderFuzzerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public class HtmlPolicyBuilderFuzzerTest extends FuzzyTestCase {

final Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy> policyFactory
= new HtmlPolicyBuilder()
.allowElements("a", "b", "xmp")
.allowElements("a", "b", "xmp", "pre")
.allowAttributes("href").onElements("a")
.allowAttributes("title").globally()
.allowStandardUrlProtocols()
Expand Down Expand Up @@ -129,7 +129,7 @@ private static void checkSafe(Node node, String html) {
switch (node.getNodeType()) {
case Node.ELEMENT_NODE:
String name = node.getNodeName();
if (!"a".equals(name) && !"b".equals(name) && !"xmp".equals(name)) {
if (!"a".equals(name) && !"b".equals(name) && !"pre".equals(name)) {
fail("Illegal element name " + name + " : " + html);
}
NamedNodeMap attrs = node.getAttributes();
Expand Down
21 changes: 20 additions & 1 deletion src/tests/org/owasp/html/HtmlSanitizerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,25 @@ public final void testInnerHTMLIE8() throws Exception {
sanitize("<div title=\"``onmouseover=alert(1337)\">"));
}

public final void testNabobsOfNegativism() throws Exception {
// Treating <noscript> as raw-text gains us nothing security-wise.
assertEquals("<noscript></noscript>",
sanitize("<noscript><evil></noscript>"));
assertEquals("<noscript>I <b>&lt;3</b> Ponies</noscript>",
sanitize("<noscript>I <b><3</b> Ponies</noscript>"));
assertEquals("<noscript>I <b>&lt;3</b> Ponies</noscript>",
sanitize("<NOSCRIPT>I <b><3</b> Ponies</noscript><evil>"));
assertEquals("<noframes>I <b>&lt;3</b> Ponies</noframes>",
sanitize("<noframes>I <b><3</b> Ponies</noframes><evil>"));
assertEquals("<noembed>I <b>&lt;3</b> Ponies</noembed>",
sanitize("<noembed>I <b><3</b> Ponies</noembed><evil>"));
assertEquals("<noxss>I <b>&lt;3</b> Ponies</noxss>",
sanitize("<noxss>I <b><3</b> Ponies</noxss><evil>"));
assertEquals(
"&lt;noscript&gt;I &lt;b&gt;&lt;3&lt;/b&gt; Ponies&lt;/noscript&gt;",
sanitize("<xmp><noscript>I <b><3</b> Ponies</noscript></xmp>"));
}

private static String sanitize(@Nullable String html) throws Exception {
StringBuilder sb = new StringBuilder();
HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
Expand All @@ -426,7 +445,7 @@ public void handle(String errorMessage) {
// Allow these tags.
.allowElements(
"a", "b", "br", "div", "i", "img", "input", "li",
"ol", "p", "span", "ul")
"ol", "p", "span", "ul", "noscript", "noframes", "noembed", "noxss")
// And these attributes.
.allowAttributes(
"dir", "checked", "class", "href", "id", "target", "title", "type")
Expand Down
46 changes: 46 additions & 0 deletions src/tests/org/owasp/html/HtmlStreamRendererTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,52 @@ public final void testAppendNumericEntity() throws Exception {
sb.toString());
}

// Test that policies that naively allow <xmp>, <listing>, or <plaintext>
// on XHTML don't shoot themselves in the foot.

public final void testPreSubstitutes1() throws Exception {
renderer.openDocument();
renderer.openTag("Xmp", ImmutableList.<String>of());
renderer.text("<form>Hello, World</form>");
renderer.closeTag("Xmp");
renderer.closeDocument();

assertEquals("<pre>&lt;form&gt;Hello, World&lt;/form&gt;</pre>",
rendered.toString());
}

public final void testPreSubstitutes2() throws Exception {
renderer.openDocument();
renderer.openTag("xmp", ImmutableList.<String>of());
renderer.text("<form>Hello, World</form>");
renderer.closeTag("xmp");
renderer.closeDocument();

assertEquals("<pre>&lt;form&gt;Hello, World&lt;/form&gt;</pre>",
rendered.toString());
}

public final void testPreSubstitutes3() throws Exception {
renderer.openDocument();
renderer.openTag("LISTING", ImmutableList.<String>of());
renderer.text("<form>Hello, World</form>");
renderer.closeTag("LISTING");
renderer.closeDocument();

assertEquals("<pre>&lt;form&gt;Hello, World&lt;/form&gt;</pre>",
rendered.toString());
}

public final void testPreSubstitutes4() throws Exception {
renderer.openDocument();
renderer.openTag("plaintext", ImmutableList.<String>of());
renderer.text("<form>Hello, World</form>");
renderer.closeDocument();

assertEquals("<pre>&lt;form&gt;Hello, World&lt;/form&gt;",
rendered.toString());
}

private void assertNormalized(String golden, String htmlInput)
throws Exception {
assertEquals(golden, normalize(htmlInput));
Expand Down

0 comments on commit 2027d3d

Please sign in to comment.