diff --git a/HISTORY.md b/HISTORY.md index 8fb0a7e89e..0deb439b0c 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,8 @@ # In Progress +## Improvements +* Move `Dim` and `Domain` from Cython to pure Python [#1327](https://github.com/TileDB-Inc/TileDB-Py/pull/1327) + ## Bug Fixes * Ensure NumPy array matches array schema dimensions for dense writes [#1514](https://github.com/TileDB-Inc/TileDB-Py/pull/1514) @@ -104,6 +107,7 @@ ## Improvements * Move `Attr` from Cython to pure Python [#1326](https://github.com/TileDB-Inc/TileDB-Py/pull/1326) +* Move `Domain` and `Dim` from Cython to pure Python [#1327](https://github.com/TileDB-Inc/TileDB-Py/pull/1327) ## API Changes * Permit true-ASCII attributes in non-from-pandas dataframes [#1337](https://github.com/TileDB-Inc/TileDB-Py/pull/1337) diff --git a/tiledb/cc/attribute.cc b/tiledb/cc/attribute.cc index 95c714912c..f2d9296992 100644 --- a/tiledb/cc/attribute.cc +++ b/tiledb/cc/attribute.cc @@ -41,14 +41,14 @@ py::array get_fill_value(Attribute &attr) { void init_attribute(py::module &m) { py::class_(m, "Attribute") - .def(py::init(), - py::keep_alive<1, 2>()) + .def(py::init()) + + .def(py::init()) .def( - py::init(), - py::keep_alive<1, 2>()) + py::init()) - .def(py::init(), py::keep_alive<1, 2>()) + .def(py::init()) .def("__capsule__", [](Attribute &attr) { diff --git a/tiledb/cc/domain.cc b/tiledb/cc/domain.cc index 86eb527564..c0a7c33026 100644 --- a/tiledb/cc/domain.cc +++ b/tiledb/cc/domain.cc @@ -14,57 +14,196 @@ using namespace tiledbpy::common; namespace py = pybind11; void init_domain(py::module &m) { - py::class_(m, "Dimension") - .def("create", - [](const Context &ctx, const std::string &name, - tiledb_datatype_t datatype, py::buffer range, py::buffer extent) { - auto range_info = range.request(); - auto extent_info = extent.request(); - if (datatype != TILEDB_STRING_ASCII) { - if (!expect_buffer_nbytes(range_info, datatype, 2)) { - throw py::value_error( - "Unexpected type/shape for range buffer!"); - } - if (!expect_buffer_nbytes(extent_info, datatype, 1)) { - throw py::value_error( - "Unexpected type/shape for range buffer!"); - } + py::class_(m, "Dimension") + .def(py::init()) + + .def(py::init([](const Context &ctx, const std::string &name, + tiledb_datatype_t datatype, py::object domain, + py::object tile_extent) { + void *dim_dom = nullptr; + void *dim_tile = nullptr; + + if (!domain.is_none()) { + py::buffer domain_buffer = py::buffer(domain); + py::buffer_info domain_info = domain_buffer.request(); + dim_dom = domain_info.ptr; + } + + if (!tile_extent.is_none()) { + py::buffer tile_buffer = py::buffer(tile_extent); + py::buffer_info tile_extent_info = tile_buffer.request(); + dim_tile = tile_extent_info.ptr; } - const void *range_data = - (datatype != TILEDB_STRING_ASCII) ? range_info.ptr : nullptr; - const void *extent_data = - (datatype != TILEDB_STRING_ASCII) ? extent_info.ptr : nullptr; + return std::make_unique( + Dimension::create(ctx, name, datatype, dim_dom, dim_tile)); + }), + py::keep_alive<1, 2>()) - return std::make_unique(Dimension::create( - ctx, name, datatype, range_data, extent_data)); - }) - .def_property_readonly("name", &Dimension::name) - // .def_property_readonly("domain", &Dimension::domain) - // .def_property_readonly("tile", &Dimension::tile_extent) - .def_property("filters", &Dimension::filter_list, + .def(py::init(), py::keep_alive<1, 2>()) + + .def_property_readonly("_name", &Dimension::name) + + .def_property_readonly( + "_domain", + [](Dimension &dim) { + switch (dim.type()) { + case TILEDB_UINT64: { + auto dom = dim.domain(); + return py::make_tuple(dom.first, dom.second); + } + case TILEDB_DATETIME_YEAR: + case TILEDB_DATETIME_MONTH: + case TILEDB_DATETIME_WEEK: + case TILEDB_DATETIME_DAY: + case TILEDB_DATETIME_HR: + case TILEDB_DATETIME_MIN: + case TILEDB_DATETIME_SEC: + case TILEDB_DATETIME_MS: + case TILEDB_DATETIME_US: + case TILEDB_DATETIME_NS: + case TILEDB_DATETIME_PS: + case TILEDB_DATETIME_FS: + case TILEDB_DATETIME_AS: + case TILEDB_INT64: { + auto dom = dim.domain(); + return py::make_tuple(dom.first, dom.second); + } + case TILEDB_UINT32: { + auto dom = dim.domain(); + return py::make_tuple(dom.first, dom.second); + } + case TILEDB_INT32: { + auto dom = dim.domain(); + return py::make_tuple(dom.first, dom.second); + } + case TILEDB_UINT16: { + auto dom = dim.domain(); + return py::make_tuple(dom.first, dom.second); + } + case TILEDB_INT16: { + auto dom = dim.domain(); + return py::make_tuple(dom.first, dom.second); + } + case TILEDB_UINT8: { + auto dom = dim.domain(); + return py::make_tuple(dom.first, dom.second); + } + case TILEDB_INT8: { + auto dom = dim.domain(); + return py::make_tuple(dom.first, dom.second); + } + case TILEDB_FLOAT64: { + auto dom = dim.domain(); + return py::make_tuple(dom.first, dom.second); + } + case TILEDB_FLOAT32: { + auto dom = dim.domain(); + return py::make_tuple(dom.first, dom.second); + } + case TILEDB_STRING_ASCII: { + return py::make_tuple("", ""); + } + default: + TPY_ERROR_LOC("Unsupported dtype for Dimension's domain"); + } + }) + + .def_property_readonly( + "_tile", + [](Dimension &dim) -> py::object { + switch (dim.type()) { + case TILEDB_UINT64: { + return py::cast(dim.tile_extent()); + } + case TILEDB_DATETIME_YEAR: + case TILEDB_DATETIME_MONTH: + case TILEDB_DATETIME_WEEK: + case TILEDB_DATETIME_DAY: + case TILEDB_DATETIME_HR: + case TILEDB_DATETIME_MIN: + case TILEDB_DATETIME_SEC: + case TILEDB_DATETIME_MS: + case TILEDB_DATETIME_US: + case TILEDB_DATETIME_NS: + case TILEDB_DATETIME_PS: + case TILEDB_DATETIME_FS: + case TILEDB_DATETIME_AS: + case TILEDB_INT64: { + return py::cast(dim.tile_extent()); + } + case TILEDB_UINT32: { + return py::cast(dim.tile_extent()); + } + case TILEDB_INT32: { + return py::cast(dim.tile_extent()); + } + case TILEDB_UINT16: { + return py::cast(dim.tile_extent()); + } + case TILEDB_INT16: { + return py::cast(dim.tile_extent()); + } + case TILEDB_UINT8: { + return py::cast(dim.tile_extent()); + } + case TILEDB_INT8: { + return py::cast(dim.tile_extent()); + } + case TILEDB_FLOAT64: { + return py::cast(dim.tile_extent()); + } + case TILEDB_FLOAT32: { + return py::cast(dim.tile_extent()); + } + case TILEDB_STRING_ASCII: { + return py::none(); + } + default: + TPY_ERROR_LOC("Unsupported dtype for Dimension's tile extent"); + } + }) + + .def_property("_filters", &Dimension::filter_list, &Dimension::set_filter_list) - .def_property("ncell", &Dimension::cell_val_num, + + .def_property("_ncell", &Dimension::cell_val_num, &Dimension::set_cell_val_num) - .def("tiledb_datatype", &Dimension::type) - // TODO needs numpy <> tiledb type and void*+(type,size) -> numpy - // translators - .def("domain_to_str", &Dimension::domain_to_str); - py::class_(m, "Domain") - .def(py::init(), - py::keep_alive<1, 2>() /* ArraySchema keeps Context alive */) + .def_property_readonly("_tiledb_dtype", &Dimension::type) + + .def("_domain_to_str", &Dimension::domain_to_str); + + py::class_(m, "Domain") + .def(py::init()) - .def_property_readonly("ncell", + .def(py::init()) + + .def(py::init()) + + .def("__capsule__", + [](Domain &dom) { + return py::capsule(dom.ptr().get(), "dom", nullptr); + }) + + .def_property_readonly("_ncell", [](Domain &dom) { return dom.cell_num(); }) - .def_property_readonly("dtype", &Domain::type) - .def_property_readonly("ndim", &Domain::ndim) - .def_property_readonly("dims", &Domain::dimensions) - - .def("dim", py::overload_cast(&Domain::dimension, py::const_)) - .def("dim", py::overload_cast(&Domain::dimension, - py::const_)) - .def("add_dim", &Domain::add_dimension); + + .def_property_readonly("_tiledb_dtype", &Domain::type) + + .def_property_readonly("_ndim", &Domain::ndim) + + .def_property_readonly("_dims", &Domain::dimensions) + + .def("_dim", py::overload_cast(&Domain::dimension, py::const_)) + .def("_dim", py::overload_cast(&Domain::dimension, + py::const_)) + + .def("_has_dim", &Domain::has_dimension) + + .def("_add_dim", &Domain::add_dimension, py::keep_alive<1, 2>()) + + .def("_dump", [](Domain &dom) { dom.dump(); }); } } // namespace libtiledbcpp diff --git a/tiledb/cc/filter.cc b/tiledb/cc/filter.cc index fd5175d226..f7d22adb9f 100644 --- a/tiledb/cc/filter.cc +++ b/tiledb/cc/filter.cc @@ -16,8 +16,7 @@ namespace py = pybind11; void init_filter(py::module &m) { py::class_(m, "Filter") - .def(py::init(), - py::keep_alive<1, 2>()) + .def(py::init()) .def_property_readonly("_type", &Filter::filter_type) @@ -111,8 +110,8 @@ void init_filter(py::module &m) { py::class_(m, "FilterList") .def(py::init()) - .def(py::init(), py::keep_alive<1, 2>()) - .def(py::init(), py::keep_alive<1, 2>()) + .def(py::init()) + .def(py::init()) .def("__capsule__", [](FilterList &filterlist) { diff --git a/tiledb/core.cc b/tiledb/core.cc index 234f1523db..66b427de6a 100644 --- a/tiledb/core.cc +++ b/tiledb/core.cc @@ -1628,8 +1628,7 @@ void init_core(py::module &m) { auto pq = py::class_(m, "PyQuery") .def(py::init(), - py::keep_alive<1, 2>()) + py::object, py::object>()) .def("buffer_dtype", &PyQuery::buffer_dtype) .def("results", &PyQuery::results) .def("set_ranges", &PyQuery::set_ranges) diff --git a/tiledb/dimension.py b/tiledb/dimension.py new file mode 100644 index 0000000000..7e0aacf6d0 --- /dev/null +++ b/tiledb/dimension.py @@ -0,0 +1,358 @@ +import io +from typing import Any, Sequence, Tuple, TYPE_CHECKING, Union +import numpy as np + +import tiledb.cc as lt +from .ctx import default_ctx +from .filter import FilterList, Filter +from .util import ( + dtype_to_tiledb, + numpy_dtype, + tiledb_type_is_integer, + tiledb_type_is_datetime, +) + +if TYPE_CHECKING: + from .libtiledb import Ctx + + +def dtype_range(dtype: np.dtype) -> Tuple[Any, Any]: + """Return the range of a Numpy dtype""" + + if np.issubdtype(dtype, np.integer): + info = np.iinfo(dtype) + dtype_min, dtype_max = info.min, info.max + elif np.issubdtype(dtype, np.floating): + info = np.finfo(dtype) + dtype_min, dtype_max = info.min, info.max + elif dtype.kind == "M": + info = np.iinfo(np.int64) + date_unit = np.datetime_data(dtype)[0] + # +1 to exclude NaT + dtype_min = np.datetime64(info.min + 1, date_unit) + dtype_max = np.datetime64(info.max, date_unit) + else: + raise TypeError(f"invalid Dim dtype {dtype!r}") + return (dtype_min, dtype_max) + + +def _tiledb_cast_tile_extent(tile_extent: Any, dtype: np.dtype) -> np.array: + """Given a tile extent value, cast it to np.array of the given numpy dtype.""" + # Special handling for datetime domains + if dtype.kind == "M": + date_unit = np.datetime_data(dtype)[0] + if isinstance(tile_extent, np.timedelta64): + extent_value = int(tile_extent / np.timedelta64(1, date_unit)) + tile_size_array = np.array(np.int64(extent_value), dtype=np.int64) + else: + tile_size_array = np.array(tile_extent, dtype=np.int64) + else: + tile_size_array = np.array(tile_extent, dtype=dtype) + + if tile_size_array.size != 1: + raise ValueError("tile extent must be a scalar") + + return tile_size_array + + +def _tiledb_cast_domain( + domain, tiledb_dtype: lt.DataType +) -> Tuple[np.generic, np.generic]: + np_dtype = numpy_dtype(tiledb_dtype) + + if tiledb_type_is_datetime(tiledb_dtype): + date_unit = np.datetime_data(np_dtype)[0] + return ( + np.datetime64(domain[0], date_unit), + np.datetime64(domain[1], date_unit), + ) + + if tiledb_dtype in ( + lt.DataType.STRING_ASCII, + lt.DataType.STRING_UTF8, + lt.DataType.BLOB, + ): + return domain + + return (np_dtype(domain[0]), np_dtype(domain[1])) + + +class Dim(lt.Dimension): + """ + Represents a TileDB dimension. + """ + + def __init__( + self, + name: str = "__dim_0", + domain: Tuple[Any, Any] = None, + tile: Any = None, + filters: Union[FilterList, Sequence[Filter]] = None, + dtype: np.dtype = np.uint64, + var: bool = None, + ctx: "Ctx" = None, + _lt_obj: lt.Dimension = None, + ): + """Class representing a dimension of a TileDB Array. + + :param str name: the dimension name, empty if anonymous + :param domain: + :type domain: tuple(int, int) or tuple(float, float) + :param tile: Tile extent + :type tile: int or float + :param filters: List of filters to apply + :type filters: FilterList + :dtype: the Dim numpy dtype object, type object, or string \ + that can be corerced into a numpy dtype object + :raises ValueError: invalid domain or tile extent + :raises TypeError: invalid domain, tile extent, or dtype type + :raises: :py:exc:`TileDBError` + :param tiledb.Ctx ctx: A TileDB Context + + """ + self._ctx = ctx or default_ctx() + + if _lt_obj is not None: + return super().__init__(_lt_obj) + + if var is not None: + if var and np.dtype(dtype) not in (np.str_, np.bytes_): + raise TypeError("'var=True' specified for non-str/bytes dtype") + + if domain is not None and len(domain) != 2: + raise ValueError("invalid domain extent, must be a pair") + + domain_array = None + tile_size_array = None + + if (isinstance(dtype, str) and dtype == "ascii") or np.dtype(dtype).kind == "S": + # Handle var-len dom type (currently only TILEDB_STRING_ASCII) + # The dims's dom is implicitly formed as coordinates are written. + dim_datatype = lt.DataType.STRING_ASCII + else: + if dtype is not None: + dtype = np.dtype(dtype) + dtype_min, dtype_max = dtype_range(dtype) + + if domain == (None, None): + # this means to use the full extent of the type + domain = (dtype_min, dtype_max) + elif ( + domain[0] < dtype_min + or domain[0] > dtype_max + or domain[1] < dtype_min + or domain[1] > dtype_max + ): + raise TypeError( + "invalid domain extent, domain cannot be safely" + f" cast to dtype {dtype!r}" + ) + + domain_array = np.asarray(domain, dtype=dtype) + domain_dtype = domain_array.dtype + dim_datatype = dtype_to_tiledb(domain_dtype) + + # check that the domain type is a valid dtype (integer / floating) + if ( + not np.issubdtype(domain_dtype, np.integer) + and not np.issubdtype(domain_dtype, np.floating) + and not domain_dtype.kind == "M" + ): + raise TypeError(f"invalid Dim dtype {domain_dtype!r}") + + if tiledb_type_is_datetime(dim_datatype): + domain_array = domain_array.astype(dtype=np.int64) + + # if the tile extent is specified, cast + if tile is not None: + tile_size_array = _tiledb_cast_tile_extent(tile, domain_dtype) + if tile_size_array.size != 1: + raise ValueError("tile extent must be a scalar") + + super().__init__(self._ctx, name, dim_datatype, domain_array, tile_size_array) + + if filters is not None: + if isinstance(filters, FilterList): + self._filters = filters + else: + self._filters = FilterList(filters) + + def __repr__(self) -> str: + filters_str = "" + if self.filters: + filters_str = ", filters=FilterList([" + for f in self.filters: + filters_str += repr(f) + ", " + filters_str += "])" + + # for consistency, print `var=True` for string-like types + varlen = "" if not self.dtype in (np.str_, np.bytes_) else ", var=True" + return "Dim(name={0!r}, domain={1!s}, tile={2!r}, dtype='{3!s}'{4}{5})".format( + self.name, self.domain, self.tile, self.dtype, varlen, filters_str + ) + + def _repr_html_(self) -> str: + output = io.StringIO() + + output.write("") + output.write("") + output.write("") + output.write("") + output.write("") + output.write("") + output.write("") + output.write("") + output.write("") + output.write(self._repr_html_row_only_()) + output.write("
NameDomainTileData TypeIs Var-LenFilters
") + + return output.getvalue() + + def _repr_html_row_only_(self) -> str: + output = io.StringIO() + + output.write("") + output.write(f"{self.name}") + output.write(f"{self.domain}") + output.write(f"{self.tile}") + output.write(f"{self.dtype}") + output.write(f"{self.dtype in (np.str_, np.bytes_)}") + output.write(f"{self.filters._repr_html_()}") + output.write("") + + return output.getvalue() + + def __len__(self) -> int: + return self.size + + def __eq__(self, other) -> bool: + if not isinstance(other, Dim): + return False + if ( + self.name != other.name + or self.domain != other.domain + or self.tile != other.tile + or self.dtype != other.dtype + ): + return False + return True + + def __array__(self, dtype=None, **kw) -> np.array: + if not self._integer_domain(): + raise TypeError( + "conversion to numpy ndarray only valid for integer dimension domains" + ) + lb, ub = self.domain + return np.arange(int(lb), int(ub) + 1, dtype=dtype if dtype else self.dtype) + + @property + def dtype(self) -> np.dtype: + """Numpy dtype representation of the dimension type. + + :rtype: numpy.dtype + + """ + return np.dtype(numpy_dtype(self._tiledb_dtype)) + + @property + def name(self) -> str: + """The dimension label string. + + Anonymous dimensions return a default string representation based on the dimension index. + + :rtype: str + + """ + return self._name + + @property + def isvar(self) -> bool: + """True if the dimension is variable length + + :rtype: bool + :raises: :py:exc:`tiledb.TileDBError` + + """ + return self._ncell == lt.TILEDB_VAR_NUM() + + @property + def isanon(self) -> bool: + """True if the dimension is anonymous + + :rtype: bool + + """ + return self.name == "" or self.name.startswith("__dim") + + @property + def filters(self) -> FilterList: + """FilterList of the TileDB dimension + + :rtype: tiledb.FilterList + :raises: :py:exc:`tiledb.TileDBError` + + """ + return FilterList(_lt_obj=self._filters) + + @property + def shape(self) -> Tuple["np.generic", "np.generic"]: + """The shape of the dimension given the dimension's domain. + + **Note**: The shape is only valid for integer and datetime dimension domains. + + :rtype: tuple(numpy scalar, numpy scalar) + :raises TypeError: floating point (inexact) domain + + """ + if not tiledb_type_is_integer( + self._tiledb_dtype + ) and not tiledb_type_is_datetime(self._tiledb_dtype): + raise TypeError( + "shape only valid for integer and datetime dimension domains" + ) + return ((self._domain[1] - self._domain[0] + 1),) + + @property + def size(self) -> int: + """The size of the dimension domain (number of cells along dimension). + + :rtype: int + :raises TypeError: floating point (inexact) domain + + """ + if not tiledb_type_is_integer(self._tiledb_dtype): + raise TypeError("size only valid for integer dimension domains") + return int(self.shape[0]) + + @property + def tile(self) -> np.generic: + """The tile extent of the dimension. + + :rtype: numpy scalar or np.timedelta64 + + """ + np_dtype = numpy_dtype(self._tiledb_dtype) + + if tiledb_type_is_datetime(self._tiledb_dtype): + date_unit = np.datetime_data(self.dtype)[0] + return np.timedelta64(self._tile, date_unit) + + if self._tiledb_dtype in ( + lt.DataType.STRING_ASCII, + lt.DataType.STRING_UTF8, + lt.DataType.BLOB, + ): + return self._tile + + return np_dtype(self._tile) + + @property + def domain(self) -> Tuple["np.generic", "np.generic"]: + """The dimension (inclusive) domain. + + The dimension's domain is defined by a (lower bound, upper bound) tuple. + + :rtype: tuple(numpy scalar, numpy scalar) + + """ + return _tiledb_cast_domain(self._domain, self._tiledb_dtype) diff --git a/tiledb/domain.py b/tiledb/domain.py new file mode 100644 index 0000000000..3fa43bc255 --- /dev/null +++ b/tiledb/domain.py @@ -0,0 +1,218 @@ +import numpy as np +from typing import TYPE_CHECKING + +import tiledb.cc as lt +from .ctx import default_ctx +from .dimension import Dim +from .util import numpy_dtype + +import io + +if TYPE_CHECKING: + from .libtiledb import Ctx + + +class Domain(lt.Domain): + """ + Represents a TileDB domain. + """ + + def __init__( + self, + *dims: Dim, + ctx: "Ctx" = None, + _lt_obj: lt.Domain = None, + _capsule: "PyCapsule" = None, + ): + """Class representing the domain of a TileDB Array. + + :param *dims*: one or more tiledb.Dim objects up to the Domain's ndim + :raises TypeError: All dimensions must have the same dtype + :raises: :py:exc:`TileDBError` + :param tiledb.Ctx ctx: A TileDB Context + + """ + self._ctx = ctx or default_ctx() + + if _capsule is not None: + return super().__init__(self._ctx, _capsule) + + if _lt_obj is not None: + return super().__init__(_lt_obj) + + super().__init__(self._ctx) + + # support passing a list of dims without splatting + if len(dims) == 1 and isinstance(dims[0], list): + dims = dims[0] + + if len(dims) == 0: + raise lt.TileDBError("Domain must have ndim >= 1") + + if len(dims) > 1: + if all(dim.name == "__dim_0" for dim in dims): + + def clone_dim_with_name(dim, name): + return Dim( + name=name, + domain=dim.domain, + tile=dim.tile, + dtype=dim.dtype, + ctx=dim._ctx, + ) + + # rename anonymous dimensions sequentially + dims = [ + clone_dim_with_name(dims[i], name=f"__dim_{i}") + for i in range(len(dims)) + ] + elif any(dim.name.startswith("__dim_0") for dim in dims[1:]): + raise lt.TileDBError( + "Mixed dimension naming: dimensions must be either all anonymous or all named." + ) + + for d in dims: + if not isinstance(d, Dim): + raise TypeError( + "Cannot create Domain with non-Dim value for 'dims' argument" + ) + self._add_dim(d) + + def __repr__(self): + dims = ",\n ".join([repr(self.dim(i)) for i in range(self.ndim)]) + return "Domain({0!s})".format(dims) + + def _repr_html_(self) -> str: + output = io.StringIO() + + output.write("") + + output.write("") + output.write("") + output.write("") + output.write("") + output.write("") + output.write("") + output.write("") + output.write("") + for i in range(self.ndim): + output.write(self.dim(i)._repr_html_row_only_()) + output.write("
NameDomainTileData TypeIs Var-lengthFilters
") + + return output.getvalue() + + def __len__(self): + """Returns the number of dimensions of the domain""" + return self._ndim + + def __iter__(self): + """Returns a generator object that iterates over the domain's dimension objects""" + return (Dim(_lt_obj=self._dim(i)) for i in range(self.ndim)) + + def __eq__(self, other): + """Returns true if Domain is equal to self. + + :rtype: bool + """ + if not isinstance(other, Domain): + return False + + same_dtype = self._is_homogeneous() + + if same_dtype and self.shape != other.shape: + return False + + ndim = self.ndim + if ndim != other.ndim: + return False + + for i in range(ndim): + if self.dim(i) != other.dim(i): + return False + return True + + @property + def ndim(self): + """The number of dimensions of the domain. + + :rtype: int + + """ + return self._ndim + + @property + def dtype(self): + """The numpy dtype of the domain's dimension type. + + :rtype: numpy.dtype + + """ + return np.dtype(numpy_dtype(self._tiledb_dtype)) + + @property + def shape(self): + """The domain's shape, valid only for integer domains. + + :rtype: tuple + :raises TypeError: floating point (inexact) domain + + """ + return tuple(dim.shape[0] for dim in self) + + @property + def size(self): + """The domain's size (number of cells), valid only for integer domains. + + :rtype: int + :raises TypeError: floating point (inexact) domain + + """ + if self._tiledb_dtype not in ( + lt.DataType.UINT8, + lt.DataType.INT8, + lt.DataType.UINT16, + lt.DataType.INT16, + lt.DataType.UINT32, + lt.DataType.INT32, + lt.DataType.UINT64, + lt.DataType.INT64, + ): + raise TypeError("shape valid only for integer domains") + return np.product(self.shape) + + def _is_homogeneous(self): + dtype0 = self.dim(0).dtype + return all(self.dim(i).dtype == dtype0 for i in range(1, self.ndim)) + + @property + def homogeneous(self): + """Returns True if the domain's dimension types are homogeneous.""" + return self._is_homogeneous() + + def dim(self, dim_id): + """Returns a Dim object from the domain given the dimension's index or name. + + :param dim_d: dimension index (int) or name (str) + :raises: :py:exc:`tiledb.TileDBError` + + """ + if not isinstance(dim_id, (int, str)): + raise ValueError( + f"Unsupported dim identifier: '{dim_id!r}' (expected int or str)" + ) + + return Dim(_lt_obj=self._dim(dim_id)) + + def has_dim(self, name): + """ + Returns true if the Domain has a Dimension with the given name + + :param name: name of Dimension + :rtype: bool + :return: + """ + return self._has_dim(name) + + def dump(self): + """Dumps a string representation of the domain object to standard output (STDOUT)""" + self._dump() diff --git a/tiledb/filter.py b/tiledb/filter.py index 17a7ba8b31..355345bea2 100644 --- a/tiledb/filter.py +++ b/tiledb/filter.py @@ -800,8 +800,8 @@ def __init__( filters: Sequence[Filter] = None, chunksize: int = None, ctx: "Ctx" = None, - _lt_obj=None, - _capsule=None, + _lt_obj: lt.FilterList = None, + _capsule: "PyCapsule" = None, ): self._ctx = ctx or default_ctx() diff --git a/tiledb/libtiledb.pxd b/tiledb/libtiledb.pxd index bcad3a0efc..206d14d6e5 100644 --- a/tiledb/libtiledb.pxd +++ b/tiledb/libtiledb.pxd @@ -1167,30 +1167,6 @@ cdef tiledb_datatype_t _tiledb_dtype_datetime(np.dtype dtype) except? TILEDB_DAT # # ############################################################################### -cdef class Dim(object): - cdef object ctx - cdef tiledb_dimension_t* ptr - - @staticmethod - cdef from_ptr(const tiledb_dimension_t* ptr, object ctx=*) - - cdef tiledb_datatype_t _get_type(Dim self) except? TILEDB_CHAR - cdef unsigned int _cell_val_num(Dim self) except? 0 - cdef _integer_domain(self) - cdef _datetime_domain(self) - cdef _shape(self) - -cdef class Domain(object): - cdef object ctx - cdef tiledb_domain_t* ptr - - @staticmethod - cdef from_ptr(const tiledb_domain_t* ptr, object ctx=*) - cdef tiledb_datatype_t _get_type(Domain self) except? TILEDB_CHAR - cdef _integer_domain(Domain self) - cdef _is_homogeneous(Domain self) - cdef _shape(Domain self) - cdef class ArraySchema(object): cdef object ctx cdef tiledb_array_schema_t* ptr diff --git a/tiledb/libtiledb.pyx b/tiledb/libtiledb.pyx index 11270596d4..2a6626247e 100644 --- a/tiledb/libtiledb.pyx +++ b/tiledb/libtiledb.pyx @@ -15,6 +15,8 @@ from collections.abc import Sequence from .attribute import Attr from .ctx import default_ctx, Ctx, Config +from .dimension import Dim +from .domain import Domain from .filter import FilterList from .vfs import VFS from .version import version_tuple as tiledbpy_version @@ -322,8 +324,8 @@ cdef _write_array(tiledb_ctx_t* ctx_ptr, cdef np.ndarray s_end cdef void* s_start_ptr = NULL cdef void* s_end_ptr = NULL - cdef Domain dom = None - cdef Dim dim = None + # cdef Domain dom = None + # cdef Dim dim = None cdef np.dtype dim_dtype = None cdef tiledb_subarray_t* subarray_ptr = NULL if not issparse: @@ -672,24 +674,6 @@ cdef tiledb_datatype_t _tiledb_dtype_datetime(np.dtype dtype) except? TILEDB_DAT raise TypeError("np type is not a datetime {0!r}".format(date_unit)) return tdb_dt -def _tiledb_cast_tile_extent(tile_extent, dtype): - """Given a tile extent value, cast it to np.array of the given numpy dtype.""" - # Special handling for datetime domains - if dtype.kind == 'M': - date_unit = np.datetime_data(dtype)[0] - if isinstance(tile_extent, np.timedelta64): - extent_value = int(tile_extent / np.timedelta64(1, date_unit)) - tile_size_array = np.array(np.int64(extent_value), dtype=np.int64) - else: - tile_size_array = np.array(tile_extent, dtype=dtype) - else: - tile_size_array = np.array(tile_extent, dtype=dtype) - - if tile_size_array.size != 1: - raise ValueError("tile extent must be a scalar") - return tile_size_array - - cdef int _numpy_typeid(tiledb_datatype_t tiledb_dtype): """Return a numpy type num (int) given a tiledb_datatype_t enum value.""" np_id_type = _tiledb_dtype_to_numpy_typeid_convert.get(tiledb_dtype, None) @@ -782,661 +766,6 @@ cdef unicode _tiledb_layout_string(tiledb_layout_t order): return tiledb_order_to_string[order] -cdef class Dim(object): - """Class representing a dimension of a TileDB Array. - - :param str name: the dimension name, empty if anonymous - :param domain: - :type domain: tuple(int, int) or tuple(float, float) - :param tile: Tile extent - :type tile: int or float - :param filters: List of filters to apply - :type filters: FilterList - :dtype: the Dim numpy dtype object, type object, or string \ - that can be corerced into a numpy dtype object - :raises ValueError: invalid domain or tile extent - :raises TypeError: invalid domain, tile extent, or dtype type - :raises: :py:exc:`TileDBError` - :param tiledb.Ctx ctx: A TileDB Context - - """ - - def __cinit__(self): - self.ptr = NULL - - def __dealloc__(self): - if self.ptr != NULL: - tiledb_dimension_free(&self.ptr) - - @staticmethod - cdef from_ptr(const tiledb_dimension_t* ptr, ctx=None): - if not ctx: - ctx = default_ctx() - assert(ptr != NULL) - cdef Dim dim = Dim.__new__(Dim) - dim.ctx = ctx - # need to cast away the const - dim.ptr = ptr - return dim - - def __init__(self, name=u"__dim_0", domain=None, tile=None, - filters=None, dtype=np.uint64, var=None, ctx=None): - if not ctx: - ctx = default_ctx() - - cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(ctx) - - if var is not None: - if var and np.dtype(dtype) not in (np.str_, np.bytes_): - raise TypeError("'var=True' specified for non-str/bytes dtype") - - if domain is not None and len(domain) != 2: - raise ValueError('invalid domain extent, must be a pair') - - # argument conversion - cdef bytes bname = ustring(name).encode('UTF-8') - cdef const char* name_ptr = PyBytes_AS_STRING(bname) - cdef tiledb_datatype_t dim_datatype - cdef const void* domain_ptr = NULL - cdef tiledb_dimension_t* dim_ptr = NULL - cdef void* tile_size_ptr = NULL - cdef np.dtype domain_dtype - - if ((isinstance(dtype, str) and dtype == "ascii") or - dtype == np.dtype('S')): - # Handle var-len domain type - # (currently only TILEDB_STRING_ASCII) - # The dimension's domain is implicitly formed as - # coordinates are written. - dim_datatype = TILEDB_STRING_ASCII - else: - if domain is None or len(domain) != 2: - raise ValueError('invalid domain extent, must be a pair') - - if dtype is not None: - dtype = np.dtype(dtype) - dtype_min, dtype_max = dtype_range(dtype) - - if domain == (None, None): - # this means to use the full extent of the type - domain = (dtype_min, dtype_max) - elif (domain[0] < dtype_min or domain[0] > dtype_max or - domain[1] < dtype_min or domain[1] > dtype_max): - raise TypeError( - "invalid domain extent, domain cannot be safely cast to dtype {0!r}".format(dtype)) - - domain_array = np.asarray(domain, dtype=dtype) - domain_ptr = np.PyArray_DATA(domain_array) - domain_dtype = domain_array.dtype - dim_datatype = dtype_to_tiledb(domain_dtype) - # check that the domain type is a valid dtype (integer / floating) - if (not np.issubdtype(domain_dtype, np.integer) and - not np.issubdtype(domain_dtype, np.floating) and - not domain_dtype.kind == 'M'): - raise TypeError("invalid Dim dtype {0!r}".format(domain_dtype)) - # if the tile extent is specified, cast - if tile is not None: - tile_size_array = _tiledb_cast_tile_extent(tile, domain_dtype) - if tile_size_array.size != 1: - raise ValueError("tile extent must be a scalar") - tile_size_ptr = np.PyArray_DATA(tile_size_array) - - cdef tiledb_filter_list_t* filter_list_ptr = NULL - try: - check_error(ctx, - tiledb_dimension_alloc(ctx_ptr, - name_ptr, - dim_datatype, - domain_ptr, - tile_size_ptr, - &dim_ptr)) - - assert dim_ptr != NULL, "internal error: tiledb_dimension_alloc null dim_ptr" - - if filters is not None: - filter_list = filters - if not isinstance(filters, FilterList): - filter_list = FilterList(filters, ctx=ctx) - filter_list_ptr = PyCapsule_GetPointer( - filter_list.__capsule__(), "fl") - check_error(ctx, - tiledb_dimension_set_filter_list(ctx_ptr, dim_ptr, filter_list_ptr)) - except: - raise - - self.ctx = ctx - self.ptr = dim_ptr - - def __repr__(self): - filters_str = "" - if self.filters: - filters_str = ", filters=FilterList([" - for f in self.filters: - filters_str += repr(f) + ", " - filters_str += "])" - - # for consistency, print `var=True` for string-like types - varlen = "" if not self.dtype in (np.str_, np.bytes_) else ", var=True" - return "Dim(name={0!r}, domain={1!s}, tile={2!r}, dtype='{3!s}'{4}{5})" \ - .format(self.name, self.domain, self.tile, self.dtype, varlen, filters_str) - - def _repr_html_(self) -> str: - output = io.StringIO() - - output.write("") - output.write("") - output.write("") - output.write("") - output.write("") - output.write("") - output.write("") - output.write("") - output.write("") - output.write(self._repr_html_row_only_()) - output.write("
NameDomainTileData TypeIs Var-LenFilters
") - - return output.getvalue() - - def _repr_html_row_only_(self) -> str: - output = io.StringIO() - - output.write("") - output.write(f"{self.name}") - output.write(f"{self.domain}") - output.write(f"{self.tile}") - output.write(f"{self.dtype}") - output.write(f"{self.dtype in (np.str_, np.bytes_)}") - output.write(f"{self.filters._repr_html_()}") - output.write("") - - return output.getvalue() - - - def __len__(self): - return self.size - - def __eq__(self, other): - if not isinstance(other, Dim): - return False - if (self.name != other.name or - self.domain != other.domain or - self.tile != other.tile or - self.dtype != other.dtype): - return False - return True - - def __array__(self, dtype=None, **kw): - if not self._integer_domain(): - raise TypeError("conversion to numpy ndarray only valid for integer dimension domains") - lb, ub = self.domain - return np.arange(int(lb), int(ub) + 1, - dtype=dtype if dtype else self.dtype) - - cdef tiledb_datatype_t _get_type(Dim self) except? TILEDB_CHAR: - cdef tiledb_datatype_t typ - cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx) - check_error(self.ctx, - tiledb_dimension_get_type(ctx_ptr, self.ptr, &typ)) - return typ - - @property - def dtype(self): - """Numpy dtype representation of the dimension type. - - :rtype: numpy.dtype - - """ - return np.dtype(_numpy_dtype(self._get_type())) - - @property - def name(self): - """The dimension label string. - - Anonymous dimensions return a default string representation based on the dimension index. - - :rtype: str - - """ - cdef const char* name_ptr = NULL - cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx) - check_error(self.ctx, - tiledb_dimension_get_name(ctx_ptr, self.ptr, &name_ptr)) - return name_ptr.decode('UTF-8', 'strict') - - @property - def isvar(self): - """True if the dimension is variable length - - :rtype: bool - :raises: :py:exc:`tiledb.TileDBError` - - """ - cdef unsigned int ncells = self._cell_val_num() - return ncells == TILEDB_VAR_NUM - - @property - def isanon(self): - """True if the dimension is anonymous - - :rtype: bool - - """ - name = self.name - return name == u"" or name.startswith("__dim") - - @property - def filters(self): - """FilterList of the TileDB dimension - - :rtype: tiledb.FilterList - :raises: :py:exc:`tiledb.TileDBError` - - """ - cdef tiledb_filter_list_t* filter_list_ptr = NULL - cdef int rc = TILEDB_OK - cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx) - check_error(self.ctx, - tiledb_dimension_get_filter_list( - ctx_ptr, self.ptr, &filter_list_ptr)) - - return FilterList(self.ctx, _capsule=PyCapsule_New(filter_list_ptr, "fl", NULL)) - - cdef unsigned int _cell_val_num(Dim self) except? 0: - cdef unsigned int ncells = 0 - cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx) - check_error(self.ctx, - tiledb_dimension_get_cell_val_num( - ctx_ptr, - self.ptr, - &ncells)) - return ncells - - cdef _integer_domain(self): - cdef tiledb_datatype_t typ = self._get_type() - return typ in ( - TILEDB_UINT8, - TILEDB_INT8, - TILEDB_UINT16, - TILEDB_INT16, - TILEDB_UINT32, - TILEDB_INT32, - TILEDB_UINT64, - TILEDB_INT64, - ) - - cdef _datetime_domain(self): - cdef tiledb_datatype_t typ = self._get_type() - return _tiledb_type_is_datetime(typ) - - cdef _shape(self): - domain = self.domain - if self._datetime_domain(): - return (_tiledb_datetime_extent(domain[0], domain[1]),) - else: - return ((domain[1].item() - - domain[0].item() + 1),) - - @property - def shape(self): - """The shape of the dimension given the dimension's domain. - - **Note**: The shape is only valid for integer and datetime dimension domains. - - :rtype: tuple(numpy scalar, numpy scalar) - :raises TypeError: floating point (inexact) domain - - """ - if not self._integer_domain() and not self._datetime_domain(): - raise TypeError("shape only valid for integer and datetime dimension domains") - return self._shape() - - @property - def size(self): - """The size of the dimension domain (number of cells along dimension). - - :rtype: int - :raises TypeError: floating point (inexact) domain - - """ - if not self._integer_domain(): - raise TypeError("size only valid for integer dimension domains") - return int(self._shape()[0]) - - @property - def tile(self): - """The tile extent of the dimension. - - :rtype: numpy scalar or np.timedelta64 - - """ - cdef const void* tile_ptr = NULL - cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx) - check_error(self.ctx, - tiledb_dimension_get_tile_extent(ctx_ptr, self.ptr, &tile_ptr)) - if tile_ptr == NULL: - return None - cdef np.npy_intp shape[1] - shape[0] = 1 - cdef tiledb_datatype_t tiledb_type = self._get_type() - cdef int typeid = _numpy_typeid(tiledb_type) - assert(typeid != np.NPY_NOTYPE) - cdef np.ndarray tile_array =\ - np.PyArray_SimpleNewFromData(1, shape, typeid, tile_ptr) - - if _tiledb_type_is_datetime(tiledb_type): - # Coerce to np.int64 - tile_array.dtype = np.int64 - datetime_dtype = _tiledb_type_to_datetime(tiledb_type).dtype - date_unit = np.datetime_data(datetime_dtype)[0] - extent = None - if tile_array[0] == 0: - # undefined tiles should span the whole dimension domain - extent = int(self.shape[0]) - else: - extent = int(tile_array[0]) - return np.timedelta64(extent, date_unit) - else: - if tile_array[0] == 0: - # undefined tiles should span the whole dimension domain - return self.shape[0] - return tile_array[0] - - @property - def domain(self): - """The dimension (inclusive) domain. - - The dimension's domain is defined by a (lower bound, upper bound) tuple. - - :rtype: tuple(numpy scalar, numpy scalar) - - """ - if self.dtype == np.dtype('S'): - return None, None - cdef const void* domain_ptr = NULL - cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx) - check_error(self.ctx, - tiledb_dimension_get_domain(ctx_ptr, - self.ptr, - &domain_ptr)) - cdef np.npy_intp shape[1] - shape[0] = 2 - cdef tiledb_datatype_t tiledb_type = self._get_type() - cdef int typeid = _numpy_typeid(tiledb_type) - assert (typeid != np.NPY_NOTYPE) - cdef np.ndarray domain_array = \ - np.PyArray_SimpleNewFromData(1, shape, typeid, domain_ptr) - - if _tiledb_type_is_datetime(tiledb_type): - domain_array.dtype = _tiledb_type_to_datetime(tiledb_type).dtype - - return domain_array[0], domain_array[1] - - -def clone_dim_with_name(Dim dim, name): - return Dim(name=name, domain=dim.domain, tile=dim.tile, dtype=dim.dtype, ctx=dim.ctx) - -cdef class Domain(object): - """Class representing the domain of a TileDB Array. - - :param *dims*: one or more tiledb.Dim objects up to the Domain's ndim - :raises TypeError: All dimensions must have the same dtype - :raises: :py:exc:`TileDBError` - :param tiledb.Ctx ctx: A TileDB Context - - """ - - def __cinit__(self): - self.ptr = NULL - - def __dealloc__(self): - if self.ptr != NULL: - tiledb_domain_free(&self.ptr) - - @staticmethod - cdef from_ptr(const tiledb_domain_t* ptr, ctx=None): - """Constructs an Domain class instance from a (non-null) tiledb_domain_t pointer""" - if not ctx: - ctx = default_ctx() - assert(ptr != NULL) - cdef Domain dom = Domain.__new__(Domain) - dom.ctx = ctx - dom.ptr = ptr - return dom - - cdef tiledb_datatype_t _get_type(Domain self) except? TILEDB_CHAR: - cdef tiledb_datatype_t typ - cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx) - check_error(self.ctx, - tiledb_domain_get_type(ctx_ptr, self.ptr, &typ)) - return typ - - cdef _integer_domain(Domain self): - if not self._is_homogeneous(): - return False - cdef tiledb_datatype_t typ = self._get_type() - if typ == TILEDB_FLOAT32 or typ == TILEDB_FLOAT64: - return False - return True - - cdef _is_homogeneous(Domain self): - cdef np.dtype dtype0 = self.dim(0).dtype - return all(self.dim(i).dtype == dtype0 for i in range(1,self.ndim)) - - cdef _shape(Domain self): - return tuple(self.dim(i).shape[0] for i in range(self.ndim)) - - def __init__(self, *dims, ctx=None): - if not ctx: - ctx = default_ctx() - - cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(ctx) - - # support passing a list of dims without splatting - if len(dims) == 1 and isinstance(dims[0], list): - dims = dims[0] - - cdef Py_ssize_t ndim = len(dims) - if ndim == 0: - raise TileDBError("Domain must have ndim >= 1") - - if (ndim > 1): - if all(dim.name == '__dim_0' for dim in dims): - # rename anonymous dimensions sequentially - dims = [clone_dim_with_name(dims[i], name=f'__dim_{i}') for i in range(ndim)] - elif any(dim.name.startswith('__dim_0') for dim in dims[1:]): - raise TileDBError("Mixed dimension naming: dimensions must be either all anonymous or all named.") - - cdef tiledb_domain_t* domain_ptr = NULL - cdef int rc = tiledb_domain_alloc(ctx_ptr, &domain_ptr) - if rc != TILEDB_OK: - check_error(ctx, rc) - assert(domain_ptr != NULL) - - cdef Dim dimension - for i in range(ndim): - if not isinstance(dims[i], Dim): - raise TypeError("Cannot create Domain with non-Dim value for 'dims' argument") - - dimension = dims[i] - rc = tiledb_domain_add_dimension( - ctx_ptr, domain_ptr, dimension.ptr) - if rc != TILEDB_OK: - tiledb_domain_free(&domain_ptr) - check_error(ctx, rc) - self.ctx = ctx - self.ptr = domain_ptr - - def __repr__(self): - dims = ",\n ".join( - [repr(self.dim(i)) for i in range(self.ndim)]) - return "Domain({0!s})".format(dims) - - def _repr_html_(self) -> str: - output = io.StringIO() - - output.write("") - - output.write("") - output.write("") - output.write("") - output.write("") - output.write("") - output.write("") - output.write("") - output.write("") - for i in range(self.ndim): - output.write(self.dim(i)._repr_html_row_only_()) - output.write("
NameDomainTileData TypeIs Var-lengthFilters
") - - return output.getvalue() - - def __len__(self): - """Returns the number of dimensions of the domain""" - return self.ndim - - def __iter__(self): - """Returns a generator object that iterates over the domain's dimension objects""" - return (self.dim(i) for i in range(self.ndim)) - - def __eq__(self, other): - """Returns true if Domain is equal to self. - - :rtype: bool - """ - if not isinstance(other, Domain): - return False - - cdef bint same_dtype = self._is_homogeneous() - - if (same_dtype and - self.shape != other.shape): - return False - - ndim = self.ndim - if (ndim != other.ndim): - return False - - for i in range(ndim): - if self.dim(i) != other.dim(i): - return False - return True - - @property - def ndim(self): - """The number of dimensions of the domain. - - :rtype: int - - """ - cdef unsigned int ndim = 0 - cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx) - check_error(self.ctx, - tiledb_domain_get_ndim(ctx_ptr, self.ptr, &ndim)) - return ndim - - @property - def dtype(self): - """The numpy dtype of the domain's dimension type. - - :rtype: numpy.dtype - - """ - cdef tiledb_datatype_t typ = self._get_type() - return np.dtype(_numpy_dtype(typ)) - - @property - def shape(self): - """The domain's shape, valid only for integer domains. - - :rtype: tuple - :raises TypeError: floating point (inexact) domain - - """ - if not self._integer_domain(): - raise TypeError("shape valid only for integer domains") - return self._shape() - - @property - def size(self): - """The domain's size (number of cells), valid only for integer domains. - - :rtype: int - :raises TypeError: floating point (inexact) domain - - """ - if not self._integer_domain(): - raise TypeError("shape valid only for integer domains") - return np.product(self._shape()) - - @property - def homogeneous(self): - """Returns True if the domain's dimension types are homogeneous.""" - return self._is_homogeneous() - - def dim(self, dim_id): - """Returns a Dim object from the domain given the dimension's index or name. - - :param dim_d: dimension index (int) or name (str) - :raises: :py:exc:`tiledb.TileDBError` - - """ - cdef tiledb_dimension_t* dim_ptr = NULL - cdef bytes uname - cdef const char* name_ptr = NULL - cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx) - - if isinstance(dim_id, (str, unicode)): - uname = ustring(dim_id).encode('UTF-8') - name_ptr = uname - check_error(self.ctx, - tiledb_domain_get_dimension_from_name( - ctx_ptr, self.ptr, name_ptr, &dim_ptr)) - elif isinstance(dim_id, int): - check_error(self.ctx, - tiledb_domain_get_dimension_from_index( - ctx_ptr, self.ptr, dim_id, &dim_ptr)) - else: - raise ValueError("Unsupported dim identifier: '{}' (expected int or str)".format( - safe_repr(dim_id) - )) - - assert(dim_ptr != NULL) - return Dim.from_ptr(dim_ptr, self.ctx) - - def has_dim(self, name): - """ - Returns true if the Domain has a Dimension with the given name - - :param name: name of Dimension - :rtype: bool - :return: - """ - cdef: - cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx) - cdef tiledb_domain_t* dom_ptr = self.ptr - int32_t has_dim = 0 - int32_t rc = TILEDB_OK - bytes bname = name.encode("UTF-8") - - rc = tiledb_domain_has_dimension( - ctx_ptr, - dom_ptr, - bname, - &has_dim - ) - if rc != TILEDB_OK: - _raise_ctx_err(ctx_ptr, rc) - return bool(has_dim) - - - def dump(self): - """Dumps a string representation of the domain object to standard output (STDOUT)""" - cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx) - check_error(self.ctx, - tiledb_domain_dump(ctx_ptr, self.ptr, stdout)) - print("\n") - return - def index_as_tuple(idx): """Forces scalar index objects to a tuple representation""" if isinstance(idx, tuple): @@ -1475,7 +804,7 @@ def replace_ellipsis(ndim: int, idx: tuple): return idx -def replace_scalars_slice(dom: Domain, idx: tuple): +def replace_scalars_slice(dom, idx: tuple): """Replace scalar indices with slice objects""" new_idx, drop_axes = [], [] for i in range(dom.ndim): @@ -1497,7 +826,7 @@ def replace_scalars_slice(dom: Domain, idx: tuple): return tuple(new_idx), tuple(drop_axes) -def index_domain_subarray(array: Array, dom: Domain, idx: tuple): +def index_domain_subarray(array: Array, dom, idx: tuple): """ Return a numpy array representation of the tiledb subarray buffer for a given domain and tuple of index slices @@ -1682,7 +1011,8 @@ cdef class ArraySchema(object): if not isinstance(domain, Domain): raise TypeError("'domain' must be an instance of Domain (domain is: '{}')".format(domain)) - cdef tiledb_domain_t* domain_ptr = ( domain).ptr + cdef tiledb_domain_t* domain_ptr = PyCapsule_GetPointer( + domain.__capsule__(), "dom") cdef tiledb_domain_t* dom_with_coords_filters_ptr = NULL; cdef unsigned int ndim = 0 @@ -2038,7 +1368,7 @@ cdef class ArraySchema(object): cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx) check_error(self.ctx, tiledb_array_schema_get_domain(ctx_ptr, self.ptr, &dom)) - return Domain.from_ptr(dom, self.ctx) + return Domain(self.ctx, _capsule=PyCapsule_New(dom, "dom", NULL)) @property def nattr(self): @@ -2867,7 +2197,7 @@ cdef class Array(object): """ cdef list results = list() - cdef Domain dom = self.schema.domain + dom = self.schema.domain cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx) cdef tiledb_array_t* array_ptr = self.ptr @@ -3702,7 +3032,7 @@ cdef class DenseArrayImpl(Array): if not self.isopen or self.mode != 'w': raise TileDBError("DenseArray is not opened for writing") - cdef Domain domain = self.domain + domain = self.domain cdef tuple idx = replace_ellipsis(domain.ndim, index_as_tuple(selection)) idx,_drop = replace_scalars_slice(domain, idx) cdef object subarray = index_domain_subarray(self, domain, idx) @@ -3981,7 +3311,7 @@ cdef class DenseArrayImpl(Array): cell_layout = TILEDB_COL_MAJOR cdef ArraySchema schema = self.schema - cdef Domain domain = schema.domain + domain = schema.domain idx = tuple(slice(None) for _ in range(domain.ndim)) subarray = index_domain_subarray(self, domain, idx) diff --git a/tiledb/tests/cc/test_cc.py b/tiledb/tests/cc/test_cc.py index b8f14ca980..6105ebaa15 100644 --- a/tiledb/tests/cc/test_cc.py +++ b/tiledb/tests/cc/test_cc.py @@ -66,7 +66,7 @@ def make_range(dtype): if np.issubdtype(dtype, np.number): return np.array([0, 100.123]).astype(dtype), np.array([1]).astype(dtype) elif np.issubdtype(dtype, str) or np.issubdtype(dtype, bytes): - return np.array(["a", "z"]).astype(dtype), None + return None, None else: raise TypeError(f"Unsupported dtype '{dtype}'") @@ -88,9 +88,8 @@ def test_dimension(dtype_str): if dtype_str == "S": tiledb_datatype = lt.DataType.STRING_ASCII - extent = np.array([], dtype=dtype) # null extent - dim = lt.Dimension.create(ctx, "foo", tiledb_datatype, range, extent) + dim = lt.Dimension(ctx, "foo", tiledb_datatype, range, extent) # print(dim) @@ -189,13 +188,11 @@ def test_array(): def test_domain(): ctx = lt.Context() dom = lt.Domain(ctx) - dim = lt.Dimension.create( - ctx, "foo", lt.DataType.INT32, np.int32([0, 9]), np.int32([9]) - ) - dom.add_dim(dim) + dim = lt.Dimension(ctx, "foo", lt.DataType.INT32, np.int32([0, 9]), np.int32([9])) + dom._add_dim(dim) - assert dom.dtype == lt.DataType.INT32 - assert dom.ncell == 10 + assert dom._tiledb_dtype == lt.DataType.INT32 + assert dom._ncell == 10 # TODO assert dom.dimension("foo").domain() == ??? np.array? @@ -263,24 +260,20 @@ def test_schema(): # TODO assert schema.offsets_filter_list == dom = lt.Domain(ctx) - dim = lt.Dimension.create( - ctx, "foo", lt.DataType.INT32, np.int32([0, 9]), np.int32([9]) - ) - dom.add_dim(dim) + dim = lt.Dimension(ctx, "foo", lt.DataType.INT32, np.int32([0, 9]), np.int32([9])) + dom._add_dim(dim) schema.domain = dom # TODO dom and dimension need full equality check - assert schema.domain.dim("foo").name == dim.name + assert schema.domain._dim("foo")._name == dim._name def test_query_string(): def create_schema(): schema = lt.ArraySchema(ctx, lt.ArrayType.SPARSE) dom = lt.Domain(ctx) - dim = lt.Dimension.create( - ctx, "foo", lt.DataType.STRING_ASCII, np.uint8([]), np.uint8([]) - ) - dom.add_dim(dim) + dim = lt.Dimension(ctx, "foo", lt.DataType.STRING_ASCII, None, None) + dom._add_dim(dim) schema.domain = dom return schema @@ -303,10 +296,10 @@ def create_schema(): ctx = lt.Context() schema = lt.ArraySchema(ctx, lt.ArrayType.SPARSE) dom = lt.Domain(ctx) - dim = lt.Dimension.create( + dim = lt.Dimension( ctx, "x", lt.DataType.INT32, np.int32([0, 9]), np.int32([10]) ) - dom.add_dim(dim) + dom._add_dim(dim) attr = lt.Attribute(ctx, "a", lt.DataType.INT32) schema.add_attr(attr) @@ -363,10 +356,10 @@ def create_schema(): ctx = lt.Context() schema = lt.ArraySchema(ctx, lt.ArrayType.DENSE) dom = lt.Domain(ctx) - dim = lt.Dimension.create( + dim = lt.Dimension( ctx, "x", lt.DataType.UINT64, np.uint64([0, 9]), np.uint64([10]) ) - dom.add_dim(dim) + dom._add_dim(dim) attr = lt.Attribute(ctx, "a", lt.DataType.FLOAT32) schema.add_attr(attr) diff --git a/tiledb/tests/test_attribute.py b/tiledb/tests/test_attribute.py index 68693eb143..4342b97bb5 100644 --- a/tiledb/tests/test_attribute.py +++ b/tiledb/tests/test_attribute.py @@ -15,7 +15,6 @@ def test_minimal_attribute(self): self.assertTrue(attr.isanon) self.assertEqual(attr.name, "") self.assertEqual(attr.dtype, np.float_) - # self.assertEqual(attr.compressor, (None, -1)) self.assertFalse(attr.isvar) self.assertFalse(attr.isnullable) @@ -32,9 +31,6 @@ def test_attribute(self, capfd): assert attr.name == "foo" assert attr.dtype == np.float64, "default attribute type is float64" - # compressor, level = attr.compressor - # self.assertEqual(compressor, None, "default to no compression") - # self.assertEqual(level, -1, "default compression level when none is specified") @pytest.mark.parametrize( "dtype, fill", @@ -94,7 +90,6 @@ def test_ncell_attribute(self): def test_ncell_bytes_attribute(self): dtype = np.dtype((np.bytes_, 10)) attr = tiledb.Attr("foo", dtype=dtype) - self.assertEqual(attr.dtype, dtype) self.assertEqual(attr.ncells, 10) diff --git a/tiledb/tests/test_dimension.py b/tiledb/tests/test_dimension.py new file mode 100644 index 0000000000..6ec9794d46 --- /dev/null +++ b/tiledb/tests/test_dimension.py @@ -0,0 +1,112 @@ +import numpy as np +import pytest +import unittest +import xml.etree.ElementTree + +import tiledb + + +class DimensionTest(unittest.TestCase): + def test_minimal_dimension(self): + dim = tiledb.Dim(domain=(0, 4), tile=5) + self.assertEqual(dim.name, "__dim_0", "automatic dimension name is incorrect") + self.assertEqual(dim.shape, (5,)) + self.assertEqual(dim.tile, 5) + + def test_dimension(self): + dim = tiledb.Dim(name="d1", domain=(0, 3), tile=2) + self.assertEqual(dim.name, "d1") + self.assertEqual(dim.shape, (4,)) + self.assertEqual(dim.tile, 2) + try: + assert xml.etree.ElementTree.fromstring(dim._repr_html_()) is not None + except: + pytest.fail(f"Could not parse dim._repr_html_(). Saw {dim._repr_html_()}") + + def test_dimension_filter(self): + filters = [tiledb.GzipFilter(2)] + dim = tiledb.Dim(name="df", domain=(0, 2), tile=1, filters=filters) + self.assertEqual(dim.filters, filters) + + filter_list = tiledb.FilterList(filters) + dim = tiledb.Dim(name="df", domain=(0, 2), tile=1, filters=filter_list) + self.assertEqual(dim.filters, filter_list) + + with self.assertRaises(TypeError): + tiledb.Dim(name="df", domain=(0, 2), tile=1, filters=1) + + def test_datetime_dimension(self): + # Regular usage + dim = tiledb.Dim( + name="d1", + domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), + tile=np.timedelta64(20, "D"), + dtype=np.datetime64("", "D"), + ) + self.assertEqual(dim.dtype, np.dtype(np.datetime64("", "D"))) + self.assertEqual(dim.tile, np.timedelta64(20, "D")) + self.assertNotEqual(dim.tile, np.timedelta64(21, "D")) + self.assertNotEqual(dim.tile, np.timedelta64(20, "W")) # Sanity check unit + self.assertTupleEqual( + dim.domain, (np.datetime64("2010-01-01"), np.datetime64("2020-01-01")) + ) + self.assertEqual(dim.shape, (3653,)) + + # No tile extent specified: this is not an error in 2.2 + if tiledb.libtiledb.version() < (2, 2): + with self.assertRaises(tiledb.TileDBError): + tiledb.Dim( + name="d1", + domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), + dtype=np.datetime64("", "D"), + ) + + # Integer tile extent is ok + dim = tiledb.Dim( + name="d1", + domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), + tile=20, + dtype=np.datetime64("", "D"), + ) + self.assertEqual(dim.dtype, np.dtype(np.datetime64("", "D"))) + self.assertEqual(dim.tile, np.timedelta64(20, "D")) + + # Year resolution + dim = tiledb.Dim( + name="d1", + domain=(np.datetime64("2010"), np.datetime64("2020")), + tile=5, + dtype=np.datetime64("", "Y"), + ) + self.assertEqual(dim.dtype, np.dtype(np.datetime64("", "Y"))) + self.assertEqual(dim.tile, np.timedelta64(5, "Y")) + self.assertTupleEqual( + dim.domain, (np.datetime64("2010", "Y"), np.datetime64("2020", "Y")) + ) + + # End domain promoted to day resolution + dim = tiledb.Dim( + name="d1", + domain=(np.datetime64("2010-01-01"), np.datetime64("2020")), + tile=2, + dtype=np.datetime64("", "D"), + ) + self.assertEqual(dim.tile, np.timedelta64(2, "D")) + self.assertTupleEqual( + dim.domain, + (np.datetime64("2010-01-01", "D"), np.datetime64("2020-01-01", "D")), + ) + + # Domain values can't be integral + with self.assertRaises(TypeError): + dim = tiledb.Dim( + name="d1", domain=(-10, 10), tile=2, dtype=np.datetime64("", "D") + ) + + def test_shape(self): + dim = tiledb.Dim(name="", dtype="|S0", var=True) + with self.assertRaisesRegex( + TypeError, + "shape only valid for integer and datetime dimension domains", + ): + dim.shape diff --git a/tiledb/tests/test_domain.py b/tiledb/tests/test_domain.py new file mode 100644 index 0000000000..e0f5cb75b0 --- /dev/null +++ b/tiledb/tests/test_domain.py @@ -0,0 +1,80 @@ +import numpy as np +import pytest +import xml.etree.ElementTree + +import tiledb +from tiledb.tests.common import assert_captured, DiskTestCase + + +class DomainTest(DiskTestCase): + def test_domain(self, capfd): + dims = [ + tiledb.Dim("d1", (1, 4), 2, dtype="u8"), + tiledb.Dim("d2", (1, 4), 2, dtype="u8"), + ] + dom = tiledb.Domain(*dims) + + # check that dumping works + dom.dump() + assert_captured(capfd, "Name: d1") + + self.assertEqual(dom.ndim, 2) + self.assertEqual(dom.dtype, np.dtype("uint64")) + self.assertEqual(dom.shape, (4, 4)) + + # check that we can iterate over the dimensions + dim_names = [dim.name for dim in dom] + self.assertEqual(["d1", "d2"], dim_names) + + # check that we can access dim by name + dim_d1 = dom.dim("d1") + self.assertEqual(dim_d1, dom.dim(0)) + + # check that we can construct directly from a List[Dim] + dom2 = tiledb.Domain(dims) + self.assertEqual(dom, dom2) + + try: + assert xml.etree.ElementTree.fromstring(dom._repr_html_()) is not None + except: + pytest.fail(f"Could not parse dom._repr_html_(). Saw {dom._repr_html_()}") + + def test_datetime_domain(self): + dim = tiledb.Dim( + name="d1", + domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), + tile=np.timedelta64(20, "D"), + dtype=np.datetime64("", "D"), + ) + dom = tiledb.Domain(dim) + self.assertEqual(dom.dtype, np.datetime64("", "D")) + + def test_domain_mixed_names_error(self): + with self.assertRaises(tiledb.TileDBError): + tiledb.Domain( + tiledb.Dim("d1", (1, 4), 2, dtype="u8"), + tiledb.Dim("__dim_0", (1, 4), 2, dtype="u8"), + ) + + def test_ascii_domain(self, capfd): + path = self.path("test_ascii_domain") + + dim = tiledb.Dim(name="d", dtype="ascii") + assert dim.dtype == np.bytes_ + + dom = tiledb.Domain(dim) + dom.dump() + assert_captured(capfd, "Type: STRING_ASCII") + + att = tiledb.Attr(name="a", dtype=np.int64) + schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) + tiledb.SparseArray.create(path, schema) + + ascii_coords = ["a", "b", "c", "ABC"] + unicode_coords = ["±", "×", "÷", "√"] + data = [1, 2, 3, 4] + + with tiledb.open(path, "w") as A: + with self.assertRaises(tiledb.TileDBError): + A[unicode_coords] = data + A[ascii_coords] = data diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py index 1204161269..54c723b510 100644 --- a/tiledb/tests/test_libtiledb.py +++ b/tiledb/tests/test_libtiledb.py @@ -120,186 +120,6 @@ def test_stats_include_python_json(self): assert "counters" in json_stats -class DimensionTest(unittest.TestCase): - def test_minimal_dimension(self): - dim = tiledb.Dim(domain=(0, 4), tile=5) - self.assertEqual(dim.name, "__dim_0", "automatic dimension name is incorrect") - self.assertEqual(dim.shape, (5,)) - self.assertEqual(dim.tile, 5) - - def test_dimension(self): - dim = tiledb.Dim(name="d1", domain=(0, 3), tile=2) - self.assertEqual(dim.name, "d1") - self.assertEqual(dim.shape, (4,)) - self.assertEqual(dim.tile, 2) - try: - assert xml.etree.ElementTree.fromstring(dim._repr_html_()) is not None - except: - pytest.fail(f"Could not parse dim._repr_html_(). Saw {dim._repr_html_()}") - - def test_dimension_filter(self): - filters = [tiledb.GzipFilter(2)] - dim = tiledb.Dim(name="df", domain=(0, 2), tile=1, filters=filters) - self.assertEqual(dim.filters, filters) - - filter_list = tiledb.FilterList(filters) - dim = tiledb.Dim(name="df", domain=(0, 2), tile=1, filters=filter_list) - self.assertEqual(dim.filters, filter_list) - - with self.assertRaises(TypeError): - tiledb.Dim(name="df", domain=(0, 2), tile=1, filters=1) - - def test_datetime_dimension(self): - # Regular usage - dim = tiledb.Dim( - name="d1", - domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), - tile=np.timedelta64(20, "D"), - dtype=np.datetime64("", "D"), - ) - self.assertEqual(dim.dtype, np.dtype(np.datetime64("", "D"))) - self.assertEqual(dim.tile, np.timedelta64(20, "D")) - self.assertNotEqual(dim.tile, np.timedelta64(21, "D")) - self.assertNotEqual(dim.tile, np.timedelta64(20, "W")) # Sanity check unit - self.assertTupleEqual( - dim.domain, (np.datetime64("2010-01-01"), np.datetime64("2020-01-01")) - ) - self.assertEqual(dim.shape, (3653,)) - - # No tile extent specified: this is not an error in 2.2 - if tiledb.libtiledb.version() < (2, 2): - with self.assertRaises(tiledb.TileDBError): - tiledb.Dim( - name="d1", - domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), - dtype=np.datetime64("", "D"), - ) - - # Integer tile extent is ok - dim = tiledb.Dim( - name="d1", - domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), - tile=20, - dtype=np.datetime64("", "D"), - ) - self.assertEqual(dim.dtype, np.dtype(np.datetime64("", "D"))) - self.assertEqual(dim.tile, np.timedelta64(20, "D")) - - # Year resolution - dim = tiledb.Dim( - name="d1", - domain=(np.datetime64("2010"), np.datetime64("2020")), - tile=5, - dtype=np.datetime64("", "Y"), - ) - self.assertEqual(dim.dtype, np.dtype(np.datetime64("", "Y"))) - self.assertEqual(dim.tile, np.timedelta64(5, "Y")) - self.assertTupleEqual( - dim.domain, (np.datetime64("2010", "Y"), np.datetime64("2020", "Y")) - ) - - # End domain promoted to day resolution - dim = tiledb.Dim( - name="d1", - domain=(np.datetime64("2010-01-01"), np.datetime64("2020")), - tile=2, - dtype=np.datetime64("", "D"), - ) - self.assertEqual(dim.tile, np.timedelta64(2, "D")) - self.assertTupleEqual( - dim.domain, - (np.datetime64("2010-01-01", "D"), np.datetime64("2020-01-01", "D")), - ) - - # Domain values can't be integral - with self.assertRaises(TypeError): - dim = tiledb.Dim( - name="d1", domain=(-10, 10), tile=2, dtype=np.datetime64("", "D") - ) - - def test_shape(self): - dim = tiledb.Dim(name="", dtype="|S0", var=True) - with self.assertRaisesRegex( - TypeError, - "shape only valid for integer and datetime dimension domains", - ): - dim.shape - - -class DomainTest(DiskTestCase): - def test_domain(self, capfd): - dims = [ - tiledb.Dim("d1", (1, 4), 2, dtype="u8"), - tiledb.Dim("d2", (1, 4), 2, dtype="u8"), - ] - dom = tiledb.Domain(*dims) - - # check that dumping works - dom.dump() - assert_captured(capfd, "Name: d1") - - self.assertEqual(dom.ndim, 2) - self.assertEqual(dom.dtype, np.dtype("uint64")) - self.assertEqual(dom.shape, (4, 4)) - - # check that we can iterate over the dimensions - dim_names = [dim.name for dim in dom] - self.assertEqual(["d1", "d2"], dim_names) - - # check that we can access dim by name - dim_d1 = dom.dim("d1") - self.assertEqual(dim_d1, dom.dim(0)) - - # check that we can construct directly from a List[Dim] - dom2 = tiledb.Domain(dims) - self.assertEqual(dom, dom2) - - try: - assert xml.etree.ElementTree.fromstring(dom._repr_html_()) is not None - except: - pytest.fail(f"Could not parse dom._repr_html_(). Saw {dom._repr_html_()}") - - def test_datetime_domain(self): - dim = tiledb.Dim( - name="d1", - domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), - tile=np.timedelta64(20, "D"), - dtype=np.datetime64("", "D"), - ) - dom = tiledb.Domain(dim) - self.assertEqual(dom.dtype, np.datetime64("", "D")) - - def test_domain_mixed_names_error(self): - with self.assertRaises(tiledb.TileDBError): - tiledb.Domain( - tiledb.Dim("d1", (1, 4), 2, dtype="u8"), - tiledb.Dim("__dim_0", (1, 4), 2, dtype="u8"), - ) - - def test_ascii_domain(self, capfd): - path = self.path("test_ascii_domain") - - dim = tiledb.Dim(name="d", dtype="ascii") - assert dim.dtype == np.bytes_ - - dom = tiledb.Domain(dim) - dom.dump() - assert_captured(capfd, "Type: STRING_ASCII") - - att = tiledb.Attr(name="a", dtype=np.int64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.SparseArray.create(path, schema) - - ascii_coords = ["a", "b", "c", "ABC"] - unicode_coords = ["±", "×", "÷", "√"] - data = [1, 2, 3, 4] - - with tiledb.open(path, "w") as A: - with self.assertRaises(tiledb.TileDBError): - A[unicode_coords] = data - A[ascii_coords] = data - - class ArraySchemaTest(DiskTestCase): def test_schema_basic(self): dom = tiledb.Domain( diff --git a/tiledb/tests/test_metadata.py b/tiledb/tests/test_metadata.py index b4896e3d35..567bf7b3ef 100644 --- a/tiledb/tests/test_metadata.py +++ b/tiledb/tests/test_metadata.py @@ -5,6 +5,7 @@ import tiledb import numpy as np import pytest +import hypothesis as hp from hypothesis import given, settings, strategies as st from hypothesis.extra import numpy as st_np @@ -151,6 +152,8 @@ def test_errors(self): @given(st_metadata) @settings(deadline=None) def test_basic(self, test_vals): + start = time.time() + path = self.path() with tiledb.from_numpy(path, np.ones((5,), np.float64)): pass @@ -167,6 +170,8 @@ def test_basic(self, test_vals): test_vals["bigblob"] = blob A.meta["bigblob"] = blob + hp.note(tiledb.stats_dump(print_out=False)) + with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) @@ -186,6 +191,16 @@ def test_basic(self, test_vals): with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) + duration = time.time() - start + hp.note(f"!!! test_basic duration: {duration}") + if duration > 2: + # Hypothesis setup is (maybe) causing deadline exceeded errors + # https://github.com/TileDB-Inc/TileDB-Py/issues/1194 + # Set deadline=None and use internal timing instead. + pytest.fail( + f"!!! test_basic function body duration exceeded 2s: {duration}" + ) + @given(st_metadata, st_ndarray) @settings(deadline=None) def test_numpy(self, test_vals, ndarray): diff --git a/tiledb/util.py b/tiledb/util.py index ff091478e0..3a7ad3d0ca 100644 --- a/tiledb/util.py +++ b/tiledb/util.py @@ -227,6 +227,19 @@ def tiledb_type_to_datetime(tiledb_type: lt.DataType): return tdb_type +def tiledb_type_is_integer(tiledb_type: lt.DataType): + return tiledb_type in ( + lt.DataType.UINT8, + lt.DataType.INT8, + lt.DataType.UINT16, + lt.DataType.INT16, + lt.DataType.UINT32, + lt.DataType.INT32, + lt.DataType.UINT64, + lt.DataType.INT64, + ) + + def numpy_dtype(tiledb_dtype: lt.DataType, cell_size: int = 1) -> np.dtype: """Return a numpy type given a tiledb_datatype_t enum value.""" cell_val_num = cell_size