Skip to content

Commit

Permalink
fix oom exception
Browse files Browse the repository at this point in the history
  • Loading branch information
rbri committed Apr 14, 2022
1 parent 6c00fb5 commit 9d2aecd
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 1 deletion.
Expand Up @@ -2626,7 +2626,7 @@ protected void scanPI() throws IOException {
if (c == '?' || c == '/') {
final char c0 = (char)c;
c = fCurrentEntity.read();
if (c == '>') {
if (c == -1 || c == '>') {
break;
}
fStringBuffer.append(c0);
Expand Down
Expand Up @@ -253,4 +253,22 @@ public void elementNameNormalization() throws Exception {
final String[] expectedStringLower = {"(html", "(head", "(title", ")title", ")head", "(body", ")body", ")html"};
assertEquals(Arrays.asList(expectedStringLower).toString(), filter.collectedStrings.toString());
}

/**
* Regression test for an oom exception in versions < 2.60.
* @throws Exception
*/
@Test
public void invalidProcessingInstruction() throws Exception {
final String string = "<!--?><?a/";

final HTMLConfiguration parser = new HTMLConfiguration();
final EvaluateInputSourceFilter filter = new EvaluateInputSourceFilter(parser);
parser.setProperty("http://cyberneko.org/html/properties/filters", new XMLDocumentFilter[] {filter});
final XMLInputSource source = new XMLInputSource(null, "myTest", null, new StringReader(string), "UTF-8");
parser.parse(source);

final String[] expected = {"(HTML", "(head", ")head", "(body", ")body", ")html"};
assertEquals(Arrays.asList(expected).toString(), filter.collectedStrings.toString());
}
}
1 change: 1 addition & 0 deletions src/test/resources/error-handling/test-broken-pi.html
@@ -0,0 +1 @@
<!--?><?a/
14 changes: 14 additions & 0 deletions src/test/resources/error-handling/test-broken-pi.html.canonical
@@ -0,0 +1,14 @@
[Warn] HTML1000 No character encoding indicator at beginning of document.
[Err] HTML1007 Premature end of file encountered.
#?
[Warn] HTML1008 Skipping processing instruction.
?a
[Err] HTML2000 Empty document.
[Warn] HTML2006 Bare character content found. Inserting parent element <body>.
[Warn] HTML2002 Missing parent chain. Inserting proper parent <HTML> for element <head>.
(HTML
(head
)head
(body
)body
)html
@@ -0,0 +1,2 @@
property http://cyberneko.org/html/properties/default-encoding ASCII
feature http://cyberneko.org/html/features/report-errors true

0 comments on commit 9d2aecd

Please sign in to comment.