From 78541dcc2e0f613825adcad2ff2c547d75f12460 Mon Sep 17 00:00:00 2001 From: gsvic Date: Thu, 7 May 2020 16:58:19 +0300 Subject: [PATCH] Add support for non empty domain var by name [ch1877] --- src/main/java/io/tiledb/java/api/Array.java | 148 +++++++++++++++--- .../java/io/tiledb/java/api/Dimension.java | 14 +- src/main/java/io/tiledb/java/api/Util.java | 32 ++++ .../java/io/tiledb/java/api/ArrayTest.java | 107 ++++++++++++- .../io/tiledb/java/api/DimensionTest.java | 12 ++ .../java/io/tiledb/java/api/QueryTest.java | 36 +---- 6 files changed, 288 insertions(+), 61 deletions(-) create mode 100644 src/main/java/io/tiledb/java/api/Util.java diff --git a/src/main/java/io/tiledb/java/api/Array.java b/src/main/java/io/tiledb/java/api/Array.java index 8afe272a..a4c5c9f7 100644 --- a/src/main/java/io/tiledb/java/api/Array.java +++ b/src/main/java/io/tiledb/java/api/Array.java @@ -456,34 +456,24 @@ public static void create( */ public HashMap nonEmptyDomain() throws TileDBError { checkIsOpen(); - HashMap ret = new HashMap(); - try (Domain domain = schema.getDomain(); - NativeArray domainArray = - new NativeArray(ctx, 2 * (int) domain.getRank(), domain.getType())) { - SWIGTYPE_p_int emptyp = tiledb.new_intp(); - try { - ctx.handleError( - tiledb.tiledb_array_get_non_empty_domain( - ctx.getCtxp(), arrayp, domainArray.toVoidPointer(), emptyp)); - if (tiledb.intp_value(emptyp) == 1) { - return ret; - } - } finally { - tiledb.delete_intp(emptyp); - } - for (int i = 0; i < domain.getRank(); i++) { - try (Dimension d = domain.getDimension(i)) { - ret.put( - d.getName(), - new Pair(domainArray.getItem((2 * i) + 0), domainArray.getItem((2 * i) + 1))); - } + HashMap ret = new HashMap<>(); + try { + Domain domain = schema.getDomain(); + long numDims = domain.getNDim(); + for (long dimIdx = 0; dimIdx < numDims; ++dimIdx) { + Dimension dimension = domain.getDimension(dimIdx); + Pair p = getNonEmptyDomainFromIndex(dimIdx); + ret.put(dimension.getName(), p); } + } catch (TileDBError error) { + throw error; } return ret; } /** - * Given a dimension's index, return the bounding coordinates for that dimension. + * Given a dimension's index, return the bounding coordinates for that dimension. The method + * checks if the dimension is var-sized or not, and it works for both cases. * * @param index THe dimension's index * @return A Pair that contains the dimension's bounds @@ -491,11 +481,12 @@ public HashMap nonEmptyDomain() throws TileDBError { */ public Pair getNonEmptyDomainFromIndex(long index) throws TileDBError { checkIsOpen(); - Pair p; try (Domain domain = schema.getDomain(); NativeArray domainArray = new NativeArray(ctx, 2 * (int) domain.getRank(), domain.getType())) { + if (domain.getDimension(index).isVar()) return getNonEmptyDomainVarFromIndex(index); + SWIGTYPE_p_int emptyp = tiledb.new_intp(); try { ctx.handleError( @@ -514,7 +505,8 @@ public Pair getNonEmptyDomainFromIndex(long index) throws TileDBError { } /** - * Given a dimension's name, return the bounding coordinates for that dimension. + * Given a dimension's name, return the bounding coordinates for that dimension. The method checks + * if the dimension is var-sized or not, and it works for both cases. * * @param name THe dimension's name * @return A Pair that contains the dimension's bounds @@ -525,6 +517,9 @@ public Pair getNonEmptyDomainFromName(String name) throws TileDBError { try (Domain domain = schema.getDomain(); NativeArray domainArray = new NativeArray(ctx, 2 * (int) domain.getRank(), domain.getType())) { + + if (domain.getDimension(name).isVar()) return this.getNonEmptyDomainVarFromName(name); + SWIGTYPE_p_int emptyp = tiledb.new_intp(); try { ctx.handleError( @@ -540,6 +535,111 @@ public Pair getNonEmptyDomainFromName(String name) throws TileDBError { } } + /** + * Retrieves the non-empty domain range sizes from an array for a given dimension index. This is + * the union of the non-empty domains of the array fragments on the given dimension. Applicable + * only to var-sized dimensions. + * + * @param index The dimension index + * @return The non-empty domain range sizes + * @throws TileDBError A TileDB exception + */ + public Pair getNonEmptyDomainVarSizeFromIndex(long index) + throws TileDBError { + SWIGTYPE_p_int emptyp = tiledb.new_intp(); + SWIGTYPE_p_unsigned_long_long startSize = tiledb.new_ullp(); + SWIGTYPE_p_unsigned_long_long endSize = tiledb.new_ullp(); + + ctx.handleError( + tiledb.tiledb_array_get_non_empty_domain_var_size_from_index( + ctx.getCtxp(), arrayp, index, startSize, endSize, emptyp)); + + return new Pair(tiledb.ullp_value(startSize), tiledb.ullp_value(endSize)); + } + + /** + * Retrieves the non-empty domain range sizes from an array for a given dimension name. This is + * the union of the non-empty domains of the array fragments on the given dimension. Applicable + * only to var-sized dimensions. + * + * @param name The dimension name + * @return The non-empty domain range sizes + * @throws TileDBError A TileDB exception + */ + public Pair getNonEmptyDomainVarSizeFromName(String name) + throws TileDBError { + SWIGTYPE_p_int emptyp = tiledb.new_intp(); + SWIGTYPE_p_unsigned_long_long startSize = tiledb.new_ullp(); + SWIGTYPE_p_unsigned_long_long endSize = tiledb.new_ullp(); + + ctx.handleError( + tiledb.tiledb_array_get_non_empty_domain_var_size_from_name( + ctx.getCtxp(), arrayp, name, startSize, endSize, emptyp)); + + return new Pair(tiledb.ullp_value(startSize), tiledb.ullp_value(endSize)); + } + + /** + * Retrieves the non-empty domain from an array for a given dimension index. This is the union of + * the non-empty domains of the array fragments on the given dimension. Applicable only to + * var-sized dimensions. + * + * @param index The dimension index + * @return The non-empty domain + * @throws TileDBError A TileDB exception + */ + public Pair getNonEmptyDomainVarFromIndex(long index) throws TileDBError { + SWIGTYPE_p_int emptyp = tiledb.new_intp(); + + Dimension dim = this.schema.getDomain().getDimension(index); + Pair size = this.getNonEmptyDomainVarSizeFromIndex(index); + + Datatype dimType = dim.getType(); + int startSize = size.getFirst().intValue(); + int endSize = size.getSecond().intValue(); + + NativeArray start = new NativeArray(ctx, startSize, dimType); + NativeArray end = new NativeArray(ctx, endSize, dimType); + + ctx.handleError( + tiledb.tiledb_array_get_non_empty_domain_var_from_index( + ctx.getCtxp(), arrayp, index, start.toVoidPointer(), end.toVoidPointer(), emptyp)); + + return new Pair( + new String((byte[]) start.toJavaArray()), new String((byte[]) end.toJavaArray())); + } + + /** + * Retrieves the non-empty domain from an array for a given dimension name. This is the union of + * the non-empty domains of the array fragments on the given dimension. Applicable only to + * var-sized dimensions. + * + * @param name The dimension name + * @return The non-empty domain + * @throws TileDBError A TileDB exception + */ + public Pair getNonEmptyDomainVarFromName(String name) throws TileDBError { + SWIGTYPE_p_int emptyp = tiledb.new_intp(); + + Dimension dim = this.schema.getDomain().getDimension(name); + + Pair size = this.getNonEmptyDomainVarSizeFromName(name); + + Datatype dimType = dim.getType(); + int startSize = size.getFirst().intValue(); + int endSize = size.getSecond().intValue(); + + NativeArray start = new NativeArray(ctx, startSize, dimType); + NativeArray end = new NativeArray(ctx, endSize, dimType); + + ctx.handleError( + tiledb.tiledb_array_get_non_empty_domain_var_from_name( + ctx.getCtxp(), arrayp, name, start.toVoidPointer(), end.toVoidPointer(), emptyp)); + + return new Pair( + new String((byte[]) start.toJavaArray()), new String((byte[]) end.toJavaArray())); + } + /** * Compute an upper bound on the buffer elements needed to read a subarray. * diff --git a/src/main/java/io/tiledb/java/api/Dimension.java b/src/main/java/io/tiledb/java/api/Dimension.java index 05424e5b..039241d8 100644 --- a/src/main/java/io/tiledb/java/api/Dimension.java +++ b/src/main/java/io/tiledb/java/api/Dimension.java @@ -260,7 +260,7 @@ public Dimension setFilterList(FilterList filters) throws TileDBError { * Sets the number of values per cell for the dimension. * * @param cellValNum The number of values per cell - * @throws TileDBError + * @throws TileDBError TileDBError A TileDB error */ public void setCellValNum(long cellValNum) throws TileDBError { try { @@ -275,7 +275,7 @@ public void setCellValNum(long cellValNum) throws TileDBError { * Retrieves the number of values per cell for the dimension. * * @return The number of values per cell - * @throws TileDBError + * @throws TileDBError TileDBError A TileDB error */ public long getCellValNum() throws TileDBError { SWIGTYPE_p_unsigned_int uint = tiledb.new_uintp(); @@ -288,6 +288,16 @@ public long getCellValNum() throws TileDBError { } } + /** + * Checks whether the dimension is var-sized + * + * @return True if the dimension is var-sized (e.g. String) and False otherwise + * @throws TileDBError A TileDB error + */ + public boolean isVar() throws TileDBError { + return this.getCellValNum() == Constants.TILEDB_VAR_NUM; + } + /** * @return A string representation of the extent. * @throws TileDBError A TileDB exception diff --git a/src/main/java/io/tiledb/java/api/Util.java b/src/main/java/io/tiledb/java/api/Util.java new file mode 100644 index 00000000..7671b6fb --- /dev/null +++ b/src/main/java/io/tiledb/java/api/Util.java @@ -0,0 +1,32 @@ +package io.tiledb.java.api; + +import java.util.Arrays; + +/** Contains helper-functions */ +public class Util { + /** + * Converts an input array of bytes to a list of Strings, according to the offsets + * + * @param offsets The offsets array + * @param data THe data array + * @return The list of Strings + */ + public static String[] bytesToStrings(long[] offsets, byte[] data) { + String[] results = new String[offsets.length]; + int start = 0, end; + + // Convert bytes to string array + for (int i = 0; i < offsets.length; ++i) { + if (i < offsets.length - 1) { + end = (int) offsets[i + 1]; + results[i] = new String(Arrays.copyOfRange(data, start, end)); + start = end; + } else { + end = data.length; + results[i] = new String(Arrays.copyOfRange(data, start, end)); + } + } + + return results; + } +} diff --git a/src/test/java/io/tiledb/java/api/ArrayTest.java b/src/test/java/io/tiledb/java/api/ArrayTest.java index b26e9dc6..30f68821 100644 --- a/src/test/java/io/tiledb/java/api/ArrayTest.java +++ b/src/test/java/io/tiledb/java/api/ArrayTest.java @@ -2,6 +2,7 @@ import static io.tiledb.java.api.Datatype.*; import static io.tiledb.java.api.Layout.TILEDB_ROW_MAJOR; +import static io.tiledb.java.api.Layout.TILEDB_UNORDERED; import static io.tiledb.java.api.QueryType.TILEDB_READ; import static io.tiledb.java.api.QueryType.TILEDB_WRITE; @@ -17,6 +18,7 @@ public class ArrayTest { private Context ctx; private String arrayURI; + private String dimName; private String attributeName; private byte[] key; @@ -26,6 +28,7 @@ public class ArrayTest { public void setup() throws Exception { ctx = new Context(); arrayURI = temp.getRoot().toString(); + dimName = "d1"; attributeName = "a1"; String keyString = "0123456789abcdeF0123456789abcdeF"; key = keyString.getBytes(StandardCharsets.US_ASCII); @@ -46,14 +49,14 @@ private Object[] getArray(Object val) { return outputArray; } - public ArraySchema schemaCreate() throws Exception { + public ArraySchema schemaCreate(ArrayType type) throws TileDBError { Dimension d1 = new Dimension(ctx, "d1", Long.class, new Pair(1l, 4l), 2l); Domain domain = new Domain(ctx); domain.addDimension(d1); Attribute a1 = new Attribute(ctx, attributeName, Long.class); - ArraySchema schema = new ArraySchema(ctx, ArrayType.TILEDB_DENSE); + ArraySchema schema = new ArraySchema(ctx, type); schema.setTileOrder(Layout.TILEDB_ROW_MAJOR); schema.setCellOrder(Layout.TILEDB_ROW_MAJOR); schema.setDomain(domain); @@ -62,6 +65,34 @@ public ArraySchema schemaCreate() throws Exception { return schema; } + public ArraySchema schemaCreate() throws TileDBError { + return schemaCreate(ArrayType.TILEDB_DENSE); + } + + public ArraySchema schemaStringDimsCreate(ArrayType arrayType) throws Exception { + Dimension d1 = new Dimension(ctx, "d1", TILEDB_STRING_ASCII, null, null); + Domain domain = new Domain(ctx); + domain.addDimension(d1); + + ArraySchema schema = new ArraySchema(ctx, arrayType); + + schema.setDomain(domain); + schema.check(); + return schema; + } + + public void insertArbitraryValuesVarSize( + Array array, String attrName, NativeArray a_data, NativeArray a_offsets, Layout layout) + throws TileDBError { + // Create query + try (Query query = new Query(array, TILEDB_WRITE)) { + query.setLayout(layout).setBuffer(attrName, a_offsets, a_data); + query.submit(); + query.finalizeQuery(); + } + array.close(); + } + public void insertArbitraryValuesMeth(Array array, NativeArray a_data) throws TileDBError { // Create query try (Query query = new Query(array, TILEDB_WRITE)) { @@ -260,6 +291,78 @@ public void testArraygetNonEmptyDomainFromName() throws Exception { } } + @Test + public void testArrayGetNonEmptyDomainVarSizeFromIndex() throws Exception { + Array.create(arrayURI, schemaStringDimsCreate(ArrayType.TILEDB_SPARSE)); + NativeArray data = new NativeArray(ctx, "aabbccddee", TILEDB_STRING_ASCII); + NativeArray offsets = new NativeArray(ctx, new long[] {0, 2, 4, 6}, TILEDB_UINT64); + insertArbitraryValuesVarSize( + new Array(ctx, arrayURI, TILEDB_WRITE), dimName, data, offsets, TILEDB_UNORDERED); + + Array array = new Array(ctx, arrayURI, TILEDB_READ); + + Pair size = array.getNonEmptyDomainVarSizeFromIndex(0); + + Assert.assertEquals(2, size.getFirst().intValue()); + Assert.assertEquals(4, size.getSecond().intValue()); + } + + @Test + public void testArrayGetNonEmptyDomainVarSizeFromName() throws Exception { + Array.create(arrayURI, schemaStringDimsCreate(ArrayType.TILEDB_SPARSE)); + NativeArray data = new NativeArray(ctx, "aabbccddee", TILEDB_STRING_ASCII); + NativeArray offsets = new NativeArray(ctx, new long[] {0, 2, 4, 6}, TILEDB_UINT64); + insertArbitraryValuesVarSize( + new Array(ctx, arrayURI, TILEDB_WRITE), dimName, data, offsets, TILEDB_UNORDERED); + + Array array = new Array(ctx, arrayURI, TILEDB_READ); + + Pair size = array.getNonEmptyDomainVarSizeFromName(dimName); + + Assert.assertEquals(2, size.getFirst().intValue()); + Assert.assertEquals(4, size.getSecond().intValue()); + } + + @Test + public void testArrayGetNonEmptyDomainVarFromIndex() throws Exception { + Array.create(arrayURI, schemaStringDimsCreate(ArrayType.TILEDB_SPARSE)); + NativeArray data = new NativeArray(ctx, "aabbccddee", TILEDB_STRING_ASCII); + NativeArray offsets = new NativeArray(ctx, new long[] {0, 2, 4, 6}, TILEDB_UINT64); + insertArbitraryValuesVarSize( + new Array(ctx, arrayURI, TILEDB_WRITE), dimName, data, offsets, TILEDB_UNORDERED); + + Array array = new Array(ctx, arrayURI, TILEDB_READ); + + Pair size1 = array.getNonEmptyDomainVarFromIndex(0); + Pair size2 = array.getNonEmptyDomainFromIndex(0); + + Assert.assertEquals("aa", size1.getFirst()); + Assert.assertEquals("ddee", size1.getSecond()); + + Assert.assertEquals("aa", size2.getFirst()); + Assert.assertEquals("ddee", size2.getSecond()); + } + + @Test + public void testArrayGetNonEmptyDomainVarFromName() throws Exception { + Array.create(arrayURI, schemaStringDimsCreate(ArrayType.TILEDB_SPARSE)); + NativeArray data = new NativeArray(ctx, "aabbccddee", TILEDB_STRING_ASCII); + NativeArray offsets = new NativeArray(ctx, new long[] {0, 2, 4, 6}, TILEDB_UINT64); + insertArbitraryValuesVarSize( + new Array(ctx, arrayURI, TILEDB_WRITE), dimName, data, offsets, TILEDB_UNORDERED); + + Array array = new Array(ctx, arrayURI, TILEDB_READ); + + Pair size1 = array.getNonEmptyDomainVarFromName(dimName); + Pair size2 = array.getNonEmptyDomainFromName(dimName); + + Assert.assertEquals("aa", size1.getFirst()); + Assert.assertEquals("ddee", size1.getSecond()); + + Assert.assertEquals("aa", size2.getFirst()); + Assert.assertEquals("ddee", size2.getSecond()); + } + @Test public void testArrayMetadata() throws Exception { Array.create(arrayURI, schemaCreate()); diff --git a/src/test/java/io/tiledb/java/api/DimensionTest.java b/src/test/java/io/tiledb/java/api/DimensionTest.java index ca97f169..7723880e 100644 --- a/src/test/java/io/tiledb/java/api/DimensionTest.java +++ b/src/test/java/io/tiledb/java/api/DimensionTest.java @@ -66,6 +66,18 @@ public void testSetCellValNum() throws Exception { } } + @Test + public void testIsVar() throws Exception { + try (Context ctx = new Context(); + Dimension dim1 = + new Dimension<>(ctx, "d1", Datatype.TILEDB_INT32, new Pair<>(1, 10), 10); + Dimension dim2 = new Dimension(ctx, "d2", Datatype.TILEDB_STRING_ASCII, null, null)) { + + Assert.assertFalse(dim1.isVar()); + Assert.assertTrue(dim2.isVar()); + } + } + @Test public void testGetCellValNum() throws Exception { try (Context ctx = new Context(); diff --git a/src/test/java/io/tiledb/java/api/QueryTest.java b/src/test/java/io/tiledb/java/api/QueryTest.java index 2d14239b..74a36f3f 100644 --- a/src/test/java/io/tiledb/java/api/QueryTest.java +++ b/src/test/java/io/tiledb/java/api/QueryTest.java @@ -10,7 +10,6 @@ import java.math.BigInteger; import java.nio.file.Files; import java.nio.file.Paths; -import java.util.Arrays; import java.util.HashMap; import org.junit.After; import org.junit.Assert; @@ -271,17 +270,7 @@ public void testReadStringDims() throws TileDBError { String[] results = new String[offsets.length]; int start = 0, end; - // Convert bytes to string array - for (int i = 0; i < offsets.length; ++i) { - if (i < offsets.length - 1) { - end = (int) offsets[i + 1]; - results[i] = new String(Arrays.copyOfRange(data, start, end)); - start = end; - } else { - end = data.length; - results[i] = new String(Arrays.copyOfRange(data, start, end)); - } - } + results = Util.bytesToStrings(offsets, data); Assert.assertArrayEquals(new String[] {"aa", "bb", "cc", "dd", "ee"}, results); } @@ -303,7 +292,7 @@ public void testAddRangeVar() throws TileDBError { byte[] data = (byte[]) q.getBuffer("d1"); long[] offsets = q.getVarBuffer("d1"); - Assert.assertArrayEquals(new String[] {"aa"}, bytesToStrings(offsets, data)); + Assert.assertArrayEquals(new String[] {"aa"}, Util.bytesToStrings(offsets, data)); q.close(); d_data.close(); @@ -321,7 +310,7 @@ public void testAddRangeVar() throws TileDBError { data = (byte[]) q.getBuffer("d1"); offsets = q.getVarBuffer("d1"); - Assert.assertArrayEquals(new String[] {"dd", "ee"}, bytesToStrings(offsets, data)); + Assert.assertArrayEquals(new String[] {"dd", "ee"}, Util.bytesToStrings(offsets, data)); // An invalid dimentions should throw an error d_data = new NativeArray(ctx, 20, Datatype.TILEDB_STRING_ASCII); @@ -423,23 +412,4 @@ public void testGetRangeVarSize() throws TileDBError { Assert.assertEquals(rangeEnd3.length(), (long) size3.getSecond()); } } - - public static String[] bytesToStrings(long[] offsets, byte[] data) { - String[] results = new String[offsets.length]; - int start = 0, end; - - // Convert bytes to string array - for (int i = 0; i < offsets.length; ++i) { - if (i < offsets.length - 1) { - end = (int) offsets[i + 1]; - results[i] = new String(Arrays.copyOfRange(data, start, end)); - start = end; - } else { - end = data.length; - results[i] = new String(Arrays.copyOfRange(data, start, end)); - } - } - - return results; - } }