Skip to content

Commit

Permalink
Fix #117
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder committed Oct 16, 2020
1 parent f3e7afa commit d0c237b
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 41 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ SAX2 and Stax2 APIs
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<version>${version.junit}</version>
<scope>test</scope>
</dependency>
</dependencies>
Expand Down
4 changes: 4 additions & 0 deletions release-notes/VERSION
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ Project: woodstox
=== Releases ===
------------------------------------------------------------------------

6.2.3 (16-Oct-2020)

#117: Problem with `Stax2ByteArraySource`, encodings other than UTF-8

6.2.2 (13-Oct-2020)

#112: Max attributes per element limit only loosely enforced
Expand Down
18 changes: 10 additions & 8 deletions src/main/java/com/ctc/wstx/io/MergedStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,22 @@
public final class MergedStream
extends InputStream
{
final ReaderConfig mConfig;
final private ReaderConfig mConfig;

final InputStream mIn;
final private InputStream mIn;

byte[] mData;
private byte[] mData;

int mPtr;
private int mPtr;

final int mEnd;
final private int mEnd;

public MergedStream(ReaderConfig cfg,
InputStream in, byte[] buf, int start, int end)
InputStream in, byte[] buf, int start, int end)
{
if (in == null) {
throw new IllegalArgumentException("InputStream `in` should not be `null`");
}
mConfig = cfg;
mIn = in;
mData = buf;
Expand Down Expand Up @@ -60,8 +63,7 @@ public void mark(int readlimit) {

@Override
public boolean markSupported() {
/* Only supports marks past the initial rewindable section...
*/
// Only supports marks past the initial rewindable section...
return (mData == null) && mIn.markSupported();
}

Expand Down
15 changes: 6 additions & 9 deletions src/main/java/com/ctc/wstx/io/ReaderSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,24 +80,21 @@ public boolean fromInternalEntity() {
public int readInto(WstxInputData reader)
throws IOException, XMLStreamException
{
/* Shouldn't really try to read after closing, but it may be easier
* for caller not to have to keep track of closure...
*/
// Shouldn't really try to read after closing, but it may be easier
// for caller not to have to keep track of closure...
if (mBuffer == null) {
return -1;
}
int count = mReader.read(mBuffer, 0, mBuffer.length);
if (count < 1) {
/* Let's prevent caller from accidentally being able to access
* data, first.
*/
// Let's prevent caller from accidentally being able to access
// data, first.
mInputLast = 0;
reader.mInputPtr = 0;
reader.mInputEnd = 0;
if (count == 0) {
/* Sanity check; should never happen with correctly written
* Readers:
*/
// Sanity check; should never happen with correctly written
// Readers:
throw new WstxException("Reader (of type "+mReader.getClass().getName()+") returned 0 characters, even when asked to read up to "+mBuffer.length, getLocation());
}
return -1;
Expand Down
27 changes: 16 additions & 11 deletions src/main/java/com/ctc/wstx/io/StreamBootstrapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ public static StreamBootstrapper getInstance(String pubId, SystemId sysId, byte[
return new StreamBootstrapper(pubId, sysId, data, start, end);
}

@SuppressWarnings("resource")
@Override
public Reader bootstrapInput(ReaderConfig cfg, boolean mainDoc, int xmlVersion)
throws IOException, XMLStreamException
Expand All @@ -160,9 +161,9 @@ public Reader bootstrapInput(ReaderConfig cfg, boolean mainDoc, int xmlVersion)
if (bufSize < MIN_BUF_SIZE) {
bufSize = MIN_BUF_SIZE;
}
if (mByteBuffer == null) { // non-null if we were passed a buffer
mByteBuffer = cfg.allocFullBBuffer(bufSize);
}
if (mByteBuffer == null) { // non-null if we were passed a buffer
mByteBuffer = cfg.allocFullBBuffer(bufSize);
}

resolveStreamEncoding();

Expand Down Expand Up @@ -233,12 +234,17 @@ public Reader bootstrapInput(ReaderConfig cfg, boolean mainDoc, int xmlVersion)
// Ok; first, do we need to merge stuff back?
InputStream in = mIn;
if (mInputPtr < mInputEnd) {
in = new MergedStream(cfg, in, mByteBuffer, mInputPtr, mInputEnd);
// 16-Oct-2020, tatu: But we may or may not have InputStream to merge
// as per [woodstox-core#117]
if (in == null) {
in = new ByteArrayInputStream(mByteBuffer, mInputPtr, mInputEnd - mInputPtr);
} else {
in = new MergedStream(cfg, in, mByteBuffer, mInputPtr, mInputEnd);
}
}
/* 20-Jan-2006, TSa: Ok; although it is possible to declare
* stream as 'UTF-16', JDK may need help in figuring out
* the right order, so let's be explicit:
*/
// 20-Jan-2006, TSa: Ok; although it is possible to declare
// stream as 'UTF-16', JDK may need help in figuring out
// the right order, so let's be explicit:
if (normEnc == CharsetNames.CS_UTF16) {
mInputEncoding = normEnc = mBigEndian ? CharsetNames.CS_UTF16BE : CharsetNames.CS_UTF16LE;
}
Expand Down Expand Up @@ -474,9 +480,8 @@ protected String verifyXmlEncoding(String enc)
protected boolean ensureLoaded(int minimum)
throws IOException
{
/* Let's assume here buffer has enough room -- this will always
* be true for the limited used this method gets
*/
// Let's assume here buffer has enough room -- this will always
// be true for the limited used this method gets
int gotten = (mInputEnd - mInputPtr);
while (gotten < minimum) {
int count = (mIn == null) ? -1 : mIn.read(mByteBuffer, mInputEnd, mByteBuffer.length - mInputEnd);
Expand Down
38 changes: 26 additions & 12 deletions src/test/java/wstxtest/stream/TestEncodingDetection.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package wstxtest.stream;

import java.io.*;

import javax.xml.stream.*;

import org.codehaus.stax2.XMLInputFactory2;
import org.codehaus.stax2.io.Stax2ByteArraySource;

/**
* This set on unit tests checks that woodstox-specific invariants
* regarding automatic input encoding detection are maintained. Some
Expand All @@ -17,8 +18,7 @@ public class TestEncodingDetection

final static String ENC_EBCDIC_OUT_PREFIX = "IBM";

public void testUtf8()
throws IOException, XMLStreamException
public void testUtf8() throws Exception
{
/* Default is, in absence of any other indications, UTF-8...
* let's check the shortest legal doc:
Expand All @@ -34,8 +34,23 @@ public void testUtf8()
sr.close();
}

public void testUtf16()
throws XMLStreamException
// for [woodstox-core#117]
public void testWindows1252() throws Exception
{
final String doc = "<?xml version='1.0' encoding='WINDOWS-1252'?><x/>";
// it's just ASCII so getBytes() can use whatever
final byte[] b = doc.getBytes("UTF-8");
XMLStreamReader sr = getReader(b);
assertTokenType(START_DOCUMENT, sr.getEventType());
assertEquals("WINDOWS-1252", sr.getCharacterEncodingScheme());
assertEquals("WINDOWS-1252", sr.getEncoding());
// let's iterate just for fun though
assertTokenType(START_ELEMENT, sr.next());
assertTokenType(END_ELEMENT, sr.next());
sr.close();
}

public void testUtf16() throws Exception
{
// Should be able to figure out encoding...
String XML = ".<?xml version='1.0'?><root/>";
Expand Down Expand Up @@ -73,8 +88,7 @@ public void testUtf16()
* But let's try a straight-forward (naive?) test
* to verify that what is supposed to work does.
*/
public void testEBCDIC()
throws IOException, XMLStreamException
public void testEBCDIC() throws Exception
{
final String[] subtypes = new String[] {
"037", "277", "278", "280", "284", "285", "297",
Expand Down Expand Up @@ -130,10 +144,10 @@ private byte[] getUtf16Bytes(String input, boolean bigEndian)
return b;
}

private XMLStreamReader getReader(byte[] b)
throws XMLStreamException
private XMLStreamReader getReader(byte[] b) throws Exception
{
XMLInputFactory f = getInputFactory();
return f.createXMLStreamReader(new ByteArrayInputStream(b));
XMLInputFactory2 f = getInputFactory();
Stax2ByteArraySource src = new Stax2ByteArraySource(b, 0, b.length);
return f.createXMLStreamReader(src);
}
}

0 comments on commit d0c237b

Please sign in to comment.