Skip to content

Commit

Permalink
Fix #142 (multiple text events for long segments if requested) (#146)
Browse files Browse the repository at this point in the history
  • Loading branch information
johannesherr committed Apr 30, 2022
1 parent e594e42 commit 22b17c1
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 29 deletions.
12 changes: 12 additions & 0 deletions src/main/java/com/ctc/wstx/api/ReaderConfig.java
Expand Up @@ -818,6 +818,18 @@ public boolean hasInternNsURIsBeenEnabled() {
return _hasExplicitConfigFlag(CFG_INTERN_NS_URIS);
}

/**
* Checks if the user explicitly set coalescing to false. (That is if
* coalescing is disabled only because that is the default value, this method
* will return false.)
*
* @return true, if the user explicitly disabled coalescing, else false
*/
public boolean isCoalescingExplicitlyDisabled() {
// coalescing is disabled and was explicitly set by user
return !_hasConfigFlag(CFG_COALESCE_TEXT) && (mConfigFlagMods & CFG_COALESCE_TEXT) != 0;
}

/*
///////////////////////////////////////////////////////////////////////
// Simple mutators
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/ctc/wstx/sr/BasicStreamReader.java
Expand Up @@ -434,10 +434,10 @@ protected BasicStreamReader(InputBootstrapper bs,
mShortestTextSegment = Integer.MAX_VALUE;
} else {
mStTextThreshold = TOKEN_PARTIAL_SINGLE;
if (forER) {
if (forER && !cfg.isCoalescingExplicitlyDisabled()) {
/* 30-Sep-2005, TSa: No point in returning runt segments for event readers
* (due to event object overhead, less convenient); let's just force
* returning of full length segments.
* returning of full length segments. (Unless explicitly requested.)
*/
mShortestTextSegment = Integer.MAX_VALUE;
} else {
Expand Down
60 changes: 33 additions & 27 deletions src/test/java/wstxtest/evt/TestEventReader.java
Expand Up @@ -24,11 +24,11 @@
* but it creates class of non-checked exceptions used to wrap real
* stream exceptions)
* </li>
* <li>Event readers always read the full text segment, instead of returning
* fragments (ie. min. segment length will be replace with MAX_INT). This
* is done for more convenient access, as well as since the overhead of
* multiple Event objects may outweigh potential benefits from returning
* shorter segments.
* <li>Unless coalesce is explicitly set to false, event readers always read
* the full text segment, instead of returning fragments (ie. min. segment
* length will be replace with MAX_INT). This is done for more convenient
* access, as well as since the overhead of multiple Event objects may
* outweigh potential benefits from returning shorter segments.
* </li>
*</ul>
*/
Expand Down Expand Up @@ -87,25 +87,20 @@ public void testEventReaderLongSegments()
+" not sure If we\r\nreally need anything much more but"
+" let's still make this longer"
+"</root>";
;

// Need to disable coalescing though for test to work:
XMLEventReader er = getReader(XML, false);
XMLEvent evt = er.nextEvent(); // start document
assertTrue(evt.isStartDocument());
assertTrue(er.nextEvent().isStartElement());
assertTrue(er.nextEvent().isCharacters());

evt = er.nextEvent();
if (evt.isEndElement()) {
; // good
} else {
if (evt.isCharacters()) {
fail("Even in the absence of coalescing, event reader should not split CHARACTERS segments (Woodstox guarantee): did get 2 adjacent separate Characters events.");
} else { // hmmh. strange
fail("Unexpected event object type after CHARACTERS: "+evt.getClass());
}
}

// Single text event expected (default value, explicit coalescing=true):

String message = "Even in the absence of coalescing, event reader should not split CHARACTERS segments (Woodstox guarantee): did get 2 separate Characters events.";
// the default behaviour for event readers is to not break text segments into multiple events
assertEquals(message, 1, numTextEvents(getReader(XML, null)));
// if coalescing is set to true event readers do not break text segments into multiple events
assertEquals(message, 1, numTextEvents(getReader(XML, true)));

// Multiple text events expected (explicit coalescing=false):

// if coalescing is explicitly set to false, multiple text events may be returned for a text segment
String messageMultiple = "If coalescing is set to false, multiple text events are expected for this input xml.";
assertTrue(messageMultiple, numTextEvents(getReader(XML, false)) > 1);
}

/**
Expand Down Expand Up @@ -150,17 +145,28 @@ public void testDtdNotations()
// Internal methods
//////////////////////////////////////////////////////
*/

private XMLEventReader2 getReader(String contents, boolean coalescing)
private XMLEventReader2 getReader(String contents, Boolean coalescing)
throws XMLStreamException
{
XMLInputFactory f = getInputFactory();
setNamespaceAware(f, true);
setCoalescing(f, coalescing);
if (coalescing != null) {
setCoalescing(f, coalescing);
}
setLazyParsing(f, true); // shouldn't have effect for event readers!
setMinTextSegment(f, 8); // likewise
return constructEventReader(f, contents);
}

private int numTextEvents(XMLEventReader er) throws XMLStreamException {
int numTextEvents = 0;
while (er.hasNext()) {
if (er.nextEvent().isCharacters()) {
numTextEvents++;
}
}
return numTextEvents;
}
}


0 comments on commit 22b17c1

Please sign in to comment.