Skip to content

Commit

Permalink
GT-3407 Fix UnsupportedOp Exception with JISAutodetect charset. #1358
Browse files Browse the repository at this point in the history
Fixes github issue #1358.


Some character sets don't support the encoding operation.
  • Loading branch information
dev747368 committed Dec 23, 2019
1 parent 4c57727 commit 93bcabe
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 22 deletions.
Expand Up @@ -684,8 +684,8 @@ private String getStringRep(char quoteChar, char quoteCharMulti) {

// if we get the same number of characters out that we put into the decoder,
// then its a good chance there is a one-to-one correspondence between original char
// and decoded char.
boolean canRecoverOriginalCharBytes =
// offsets and decoded char offsets.
boolean isByteToStringCharEquiv =
stringValue.length() == ((stringBytes.length - aci.byteStartOffset) / charSize);

stringValue = stringLayout.shouldTrimTrailingNulls() ? trimNulls(stringValue) : stringValue;
Expand All @@ -701,22 +701,10 @@ private String getStringRep(char quoteChar, char quoteCharMulti) {
// For each 32bit character in the java string try to add it to the StringRenderBuilder
for (int i = 0, strLength = stringValue.length(); i < strLength;) {
int codePoint = stringValue.codePointAt(i);
byte[] originalCharBytes;
if (canRecoverOriginalCharBytes) {
originalCharBytes = new byte[charSize];
System.arraycopy(stringBytes, i * charSize + aci.byteStartOffset, originalCharBytes,
0, charSize);
}
else {
// can't get original bytes, cheat and run the codePoint through the charset
// to get what should be the same as the original bytes.
String singleCharStr = new String(new int[] { codePoint }, 0, 1);
originalCharBytes = convertStringToBytes(singleCharStr, aci);
}

RENDER_ENUM currentCharRenderSetting = renderSetting;
if (codePoint == StringUtilities.UNICODE_REPLACEMENT && canRecoverOriginalCharBytes &&
isMismatchedCharBytes(originalCharBytes, codePoint)) {
if (codePoint == StringUtilities.UNICODE_REPLACEMENT && isByteToStringCharEquiv &&
!isReplacementCharAt(stringBytes, i * charSize + aci.byteStartOffset)) {
// if this is a true decode error and we can recover the original bytes,
// then force the render mode to byte seq.
currentCharRenderSetting = RENDER_ENUM.BYTE_SEQ;
Expand Down Expand Up @@ -753,7 +741,8 @@ else if (Character.isISOControl(codePoint) || !Character.isDefined(codePoint) ||
strBuf.addCodePointChar(codePoint);
break;
case BYTE_SEQ:
strBuf.addByteSeq(originalCharBytes);
strBuf.addByteSeq(getOriginalBytes(isByteToStringCharEquiv, i, codePoint,
stringBytes, aci));
break;
case ESC_SEQ:
strBuf.addEscapedCodePoint(codePoint);
Expand All @@ -779,6 +768,26 @@ else if (Character.isISOControl(codePoint) || !Character.isDefined(codePoint) ||
return prefix + strBuf.toString();
}

private byte[] getOriginalBytes(boolean isByteToStringCharEquiv, int charOffset, int codePoint,
byte[] stringBytes, AdjustedCharsetInfo aci) {

if (isByteToStringCharEquiv) {
byte[] originalCharBytes = new byte[charSize];
System.arraycopy(stringBytes, charOffset * charSize + aci.byteStartOffset,
originalCharBytes, 0, charSize);
return originalCharBytes;
}

// can't get original bytes, cheat and run the codePoint through the charset
// to get what should be the same as the original bytes.
String singleCharStr = new String(new int[] { codePoint }, 0, 1);
Charset cs = Charset.isSupported(aci.charsetName) ? Charset.forName(aci.charsetName) : null;
if (cs == null || !cs.canEncode()) {
return null;
}
return singleCharStr.getBytes(cs);
}

/**
* Trims trailing nulls off the end of the string.
*
Expand Down Expand Up @@ -837,10 +846,13 @@ public String getCharRepresentation() {
StringRenderBuilder.DOUBLE_QUOTE);
}

private boolean isMismatchedCharBytes(byte[] originalCharBytes, int codePoint) {
long originalValue = DataConverter.getInstance(buf.isBigEndian()).getValue(
originalCharBytes, Math.min(charSize, originalCharBytes.length));
return originalValue != codePoint;
private boolean isReplacementCharAt(byte[] stringBytes, int byteOffset) {
if (byteOffset + charSize > stringBytes.length) {
return false;
}
long origCodePointValue = DataConverter.getInstance(buf.isBigEndian()).getValue(stringBytes,
byteOffset, charSize);
return origCodePointValue == StringUtilities.UNICODE_REPLACEMENT;
}

private static String getTranslatedStringRepresentation(String translatedString) {
Expand Down
Expand Up @@ -112,9 +112,14 @@ public void addCodePointValue(int codePoint) {
* <p>
* {@literal { 0, 1, 2 } -> 00,01,02}
*
* @param bytes
* @param bytes to convert to hex and append. If null, append "???"
*/
public void addByteSeq(byte[] bytes) {
if (bytes == null) {
ensureByteMode();
sb.append("???");
return;
}
for (int i = 0; i < bytes.length; i++) {
ensureByteMode();
String valStr = Integer.toHexString(bytes[i] & 0xff).toUpperCase();
Expand Down

0 comments on commit 93bcabe

Please sign in to comment.