diff --git a/build.gradle b/build.gradle index e354ab4c..ff981c01 100644 --- a/build.gradle +++ b/build.gradle @@ -100,6 +100,9 @@ task generateJNI(type: Exec) { executable = "../generate_tiledb_jni" } +compileJava.options.encoding = 'UTF-8' +compileTestJava.options.encoding = 'UTF-8' + import org.apache.tools.ant.taskdefs.condition.Os test { useJUnit() diff --git a/src/main/java/io/tiledb/java/api/NativeArray.java b/src/main/java/io/tiledb/java/api/NativeArray.java index 3c2f99b6..6d5ec9d2 100644 --- a/src/main/java/io/tiledb/java/api/NativeArray.java +++ b/src/main/java/io/tiledb/java/api/NativeArray.java @@ -33,6 +33,7 @@ package io.tiledb.java.api; import io.tiledb.libtiledb.*; +import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; public class NativeArray implements AutoCloseable { @@ -196,10 +197,16 @@ private int getSize(Object buffer) throws TileDBError { { return ((long[]) buffer).length; } - case TILEDB_STRING_ASCII: case TILEDB_CHAR: + case TILEDB_STRING_ASCII: { - return stringToBytes(buffer).length; + Charset charset = StandardCharsets.ISO_8859_1; + return stringToBytes((String) buffer, charset).length; + } + case TILEDB_STRING_UTF8: + { + Charset charset = StandardCharsets.UTF_8; + return stringToBytes((String) buffer, charset).length; } case TILEDB_DATETIME_YEAR: case TILEDB_DATETIME_MONTH: @@ -276,11 +283,17 @@ private void createNativeArrayFromBuffer(Object buffer) throws TileDBError { uint64_tArray = Utils.newInt64_tArray((long[]) buffer); break; } - case TILEDB_STRING_ASCII: case TILEDB_CHAR: + case TILEDB_STRING_ASCII: + { + Charset charset = StandardCharsets.ISO_8859_1; + int8_tArray = Utils.newInt8_tArray(stringToBytes((String) buffer, charset)); + break; + } + case TILEDB_STRING_UTF8: { - byte[] bytes = stringToBytes(buffer); - int8_tArray = Utils.newInt8_tArray(bytes); + Charset charset = StandardCharsets.UTF_8; + int8_tArray = Utils.newInt8_tArray(stringToBytes((String) buffer, charset)); break; } case TILEDB_DATETIME_YEAR: @@ -531,10 +544,20 @@ public void setItem(int index, Object value) throws ArrayIndexOutOfBoundsExcepti uint64_tArray.setitem(index, (long) value); break; } - case TILEDB_STRING_ASCII: case TILEDB_CHAR: + case TILEDB_STRING_ASCII: { - for (byte b : stringToBytes(value)) { + Charset charset = StandardCharsets.ISO_8859_1; + for (byte b : stringToBytes((String) value, charset)) { + int8_tArray.setitem(index, b); + index++; + } + break; + } + case TILEDB_STRING_UTF8: + { + Charset charset = StandardCharsets.UTF_8; + for (byte b : stringToBytes((String) value, charset)) { int8_tArray.setitem(index, b); index++; } @@ -607,6 +630,7 @@ public SWIGTYPE_p_void toVoidPointer() throws TileDBError { return PointerUtils.toVoid(uint64_tArray); } case TILEDB_STRING_ASCII: + case TILEDB_STRING_UTF8: case TILEDB_CHAR: { return PointerUtils.toVoid(int8_tArray); @@ -778,6 +802,7 @@ public Object toJavaArray(int position, int elements) throws TileDBError { return Utils.int64ArrayGet(uint64_tArray, position, elements); } case TILEDB_STRING_ASCII: + case TILEDB_STRING_UTF8: case TILEDB_CHAR: { return Utils.int8ArrayGet(int8_tArray, position, elements); @@ -872,6 +897,8 @@ private void createNativeArrayFromVoidPointer(SWIGTYPE_p_p_void pointer) throws int64_tArray = PointerUtils.int64_tArrayFromVoid(pointer); break; } + case TILEDB_STRING_ASCII: + case TILEDB_STRING_UTF8: case TILEDB_CHAR: { int8_tArray = PointerUtils.int8_tArrayFromVoid(pointer); @@ -959,8 +986,8 @@ private void createNativeArrayFromVoidPointer(SWIGTYPE_p_void pointer) throws Ti } } - private byte[] stringToBytes(Object buffer) { - return ((String) buffer).getBytes(StandardCharsets.UTF_8); + private byte[] stringToBytes(String buffer, Charset charset) { + return buffer.getBytes(charset); } protected Datatype getNativeType() { diff --git a/src/main/java/io/tiledb/java/api/Types.java b/src/main/java/io/tiledb/java/api/Types.java index 98b2a15c..ac18e1d0 100644 --- a/src/main/java/io/tiledb/java/api/Types.java +++ b/src/main/java/io/tiledb/java/api/Types.java @@ -111,6 +111,7 @@ public static Class getJavaType(Datatype type) throws TileDBError { { return Long.class; } + case TILEDB_STRING_UTF8: case TILEDB_STRING_ASCII: case TILEDB_CHAR: { diff --git a/src/test/java/io/tiledb/java/api/ArrayTest.java b/src/test/java/io/tiledb/java/api/ArrayTest.java index 765f3da6..66235b56 100644 --- a/src/test/java/io/tiledb/java/api/ArrayTest.java +++ b/src/test/java/io/tiledb/java/api/ArrayTest.java @@ -425,6 +425,8 @@ public void testArrayMetadata() throws Exception { Double.class); NativeArray metadataString = new NativeArray(ctx, "русский", String.class); + NativeArray metadataStringAscii = new NativeArray(ctx, "Russia", TILEDB_STRING_ASCII); + NativeArray metadataStringUtf8 = new NativeArray(ctx, "русский", TILEDB_STRING_UTF8); String byteKey = "md-byte"; String shortKey = "md-short"; @@ -432,17 +434,36 @@ public void testArrayMetadata() throws Exception { String floatKey = "md-float"; String doubleKey = "md-double"; String stringKey = "md-string"; + String stringAsciiKey = "md-string-ascii"; + String stringUtf8Key = "md-string-utf8"; // metadata keys sorted in a lexicographic ordering - String[] keys = new String[] {byteKey, doubleKey, floatKey, intKey, shortKey, stringKey}; + String[] keys = + new String[] { + byteKey, doubleKey, floatKey, intKey, shortKey, stringKey, stringAsciiKey, stringUtf8Key + }; Datatype[] types = new Datatype[] { - TILEDB_INT8, TILEDB_FLOAT64, TILEDB_FLOAT32, TILEDB_INT32, TILEDB_INT16, TILEDB_CHAR + TILEDB_INT8, + TILEDB_FLOAT64, + TILEDB_FLOAT32, + TILEDB_INT32, + TILEDB_INT16, + TILEDB_CHAR, + TILEDB_STRING_ASCII, + TILEDB_STRING_UTF8 }; int keysNum = keys.length; NativeArray[] nativeArrays = new NativeArray[] { - metadataByte, metadataDouble, metadataFloat, metadataInt, metadataShort, metadataString + metadataByte, + metadataDouble, + metadataFloat, + metadataInt, + metadataShort, + metadataString, + metadataStringAscii, + metadataStringUtf8 }; Object[] expectedArrays = new Object[] { @@ -451,7 +472,9 @@ public void testArrayMetadata() throws Exception { metadataFloat.toJavaArray(), metadataInt.toJavaArray(), metadataShort.toJavaArray(), - metadataString.toJavaArray() + metadataString.toJavaArray(), + metadataStringAscii.toJavaArray(), + metadataStringUtf8.toJavaArray() }; for (int i = 0; i < keysNum; i++) { @@ -483,6 +506,8 @@ public void testArrayMetadata() throws Exception { NativeArray metadataFloatActual = arrayn.getMetadata(floatKey, TILEDB_FLOAT32); NativeArray metadataDoubleActual = arrayn.getMetadata(doubleKey, TILEDB_FLOAT64); NativeArray metadataStringActual = arrayn.getMetadata(stringKey, TILEDB_CHAR); + NativeArray metadataStringAsciiActual = arrayn.getMetadata(stringAsciiKey, TILEDB_STRING_ASCII); + NativeArray metadataStringUtf8Actual = arrayn.getMetadata(stringUtf8Key, TILEDB_STRING_UTF8); Assert.assertNotNull(metadataByteActual); Assert.assertNotNull(metadataShortActual); @@ -490,6 +515,8 @@ public void testArrayMetadata() throws Exception { Assert.assertNotNull(metadataFloatActual); Assert.assertNotNull(metadataDoubleActual); Assert.assertNotNull(metadataStringActual); + Assert.assertNotNull(metadataStringAsciiActual); + Assert.assertNotNull(metadataStringUtf8Actual); Assert.assertArrayEquals( (byte[]) metadataByte.toJavaArray(), (byte[]) metadataByteActual.toJavaArray()); @@ -506,7 +533,17 @@ public void testArrayMetadata() throws Exception { Assert.assertArrayEquals( (byte[]) metadataString.toJavaArray(), (byte[]) metadataStringActual.toJavaArray()); Assert.assertEquals( - "русский", new String((byte[]) metadataString.toJavaArray(), StandardCharsets.UTF_8)); + "???????", new String((byte[]) metadataString.toJavaArray(), StandardCharsets.ISO_8859_1)); + Assert.assertArrayEquals( + (byte[]) metadataStringAscii.toJavaArray(), + (byte[]) metadataStringAsciiActual.toJavaArray()); + Assert.assertEquals( + "Russia", + new String((byte[]) metadataStringAscii.toJavaArray(), StandardCharsets.ISO_8859_1)); + Assert.assertArrayEquals( + (byte[]) metadataStringUtf8.toJavaArray(), (byte[]) metadataStringUtf8Actual.toJavaArray()); + Assert.assertEquals( + "русский", new String((byte[]) metadataStringUtf8.toJavaArray(), StandardCharsets.UTF_8)); // exctracion of metadata without specifying the Datatype for (int i = 0; i < keysNum; i++) {