Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[2.13] Support Boolean for Query Conditions #1432

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions HISTORY.md
@@ -1,6 +1,7 @@
# In Progress

## API Changes
* Support Boolean types for query conditions [#1432](https://github.com/TileDB-Inc/TileDB-Py/pull/1432)
* Support for partial consolidation using a list of fragment URIs [#1431](https://github.com/TileDB-Inc/TileDB-Py/pull/1431)

## Bug Fixes
Expand Down
11 changes: 1 addition & 10 deletions tiledb/libtiledb.pyx
Expand Up @@ -680,7 +680,7 @@ def _tiledb_cast_tile_extent(tile_extent, dtype):
cdef int _numpy_typeid(tiledb_datatype_t tiledb_dtype):
"""Return a numpy type num (int) given a tiledb_datatype_t enum value."""
np_id_type = _tiledb_dtype_to_numpy_typeid_convert.get(tiledb_dtype, None)
if np_id_type:
if np_id_type is not None:
return np_id_type
return np.NPY_DATETIME if _tiledb_type_is_datetime(tiledb_dtype) else np.NPY_NOTYPE

Expand Down Expand Up @@ -3681,15 +3681,6 @@ cdef class DenseArrayImpl(Array):

"""
selection_tuple = (selection,) if not isinstance(selection, tuple) else selection
if any(isinstance(s, np.ndarray) for s in selection_tuple):
warnings.warn(
"Sparse writes to dense arrays is deprecated. It is slated for removal in 0.19.0.",
DeprecationWarning,
)
assert tiledbpy_version < (0, 19, 0)
_setitem_impl_sparse(self, selection, val, dict())
return

self._setitem_impl(selection, val, dict())

def _setitem_impl(self, object selection, object val, dict nullmaps):
Expand Down
19 changes: 17 additions & 2 deletions tiledb/query_condition.py
Expand Up @@ -343,6 +343,9 @@ def get_value_from_node(self, node: QueryConditionNodeElem) -> Any:

if isinstance(value_node, ast.Constant):
value = value_node.value
elif isinstance(value_node, ast.NameConstant):
# deprecated in 3.8
value = value_node.value
elif isinstance(value_node, ast.Num):
# deprecated in 3.8
value = value_node.n
Expand All @@ -365,19 +368,27 @@ def cast_value_to_dtype(
# casted to numeric types
if isinstance(value, str):
raise TileDBError(f"Cannot cast `{value}` to {dtype}.")

if np.issubdtype(dtype, np.datetime64):
cast = getattr(np, "int64")
elif np.issubdtype(dtype, bool):
cast = getattr(np, "uint8")
else:
cast = getattr(np, dtype)

value = cast(value)

except ValueError:
raise TileDBError(f"Cannot cast `{value}` to {dtype}.")

return value

def init_pyqc(self, pyqc: PyQueryCondition, dtype: str) -> Callable:
if dtype != "string" and np.issubdtype(dtype, np.datetime64):
dtype = "int64"
if dtype != "string":
if np.issubdtype(dtype, np.datetime64):
dtype = "int64"
elif np.issubdtype(dtype, bool):
dtype = "uint8"

init_fn_name = f"init_{dtype}"

Expand Down Expand Up @@ -466,3 +477,7 @@ def visit_Str(self, node: ast.Str) -> ast.Str:
def visit_Bytes(self, node: ast.Bytes) -> ast.Bytes:
# deprecated in 3.8
return node

def visit_NameConstant(self, node: ast.NameConstant) -> ast.NameConstant:
# deprecated in 3.8
return node
63 changes: 63 additions & 0 deletions tiledb/tests/test_query_condition.py
Expand Up @@ -723,6 +723,69 @@ def test_do_not_return_attrs(self):
assert "D" in A.query(cond=cond, attrs=None).multi_index[:]
assert "D" not in A.query(cond=cond, attrs=[]).multi_index[:]

def test_boolean_sparse(self):
path = self.path("test_boolean_sparse")

dom = tiledb.Domain(tiledb.Dim(domain=(1, 10), tile=1, dtype=np.uint32))
attrs = [
tiledb.Attr(name="a", dtype=np.bool_),
tiledb.Attr(name="b", dtype=np.bool_),
tiledb.Attr(name="c", dtype=np.bool_),
]
schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True)
tiledb.Array.create(path, schema)

with tiledb.open(path, "w") as arr:
arr[np.arange(1, 11)] = {
"a": np.random.randint(0, high=2, size=10),
"b": np.random.randint(0, high=2, size=10),
"c": np.random.randint(0, high=2, size=10),
}

with tiledb.open(path) as A:
result = A.query(cond="a == True")[:]
assert all(result["a"])

result = A.query(cond="a == False")[:]
assert all(~result["a"])

result = A.query(cond="a == True and b == True")[:]
assert all(result["a"])
assert all(result["b"])

result = A.query(cond="a == False and c == True")[:]
assert all(~result["a"])
assert all(result["c"])

def test_boolean_dense(self):
path = self.path("test_boolean_dense")

dom = tiledb.Domain(tiledb.Dim(domain=(1, 10), tile=1, dtype=np.uint32))
attrs = [
tiledb.Attr(name="a", dtype=np.bool_),
tiledb.Attr(name="b", dtype=np.bool_),
tiledb.Attr(name="c", dtype=np.bool_),
]
schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=False)
tiledb.Array.create(path, schema)

with tiledb.open(path, "w") as arr:
arr[:] = {
"a": np.random.randint(0, high=2, size=10),
"b": np.random.randint(0, high=2, size=10),
"c": np.random.randint(0, high=2, size=10),
}

with tiledb.open(path) as A:
mask = A.attr("a").fill

result = A.query(cond="a == True")[:]
assert all(self.filter_dense(result["a"], mask))

result = A.query(cond="a == True and b == True")[:]
assert all(self.filter_dense(result["a"], mask))
assert all(self.filter_dense(result["b"], mask))


class QueryDeleteTest(DiskTestCase):
def test_basic_sparse(self):
Expand Down