diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index b83998f7..2d49a25d 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -42,13 +42,9 @@ from blosc2.info import InfoReporter from blosc2.ndarray import ( NUMPY_GE_2_0, - _check_allowed_dtypes, get_chunks_idx, get_intersecting_chunks, - local_ufunc_map, process_key, - ufunc_map, - ufunc_map_1param, ) from .shape_utils import constructors, elementwise_funcs, infer_shape, linalg_attrs, linalg_funcs, reducers @@ -116,9 +112,9 @@ def ne_evaluate(expression, local_dict=None, **kwargs): res = eval(expression, safe_blosc2_globals, local_dict) if "out" in kwargs: out = kwargs.pop("out") - out[:] = res[()] if isinstance(res, blosc2.LazyArray) else res + out[:] = res # will handle calc/decomp if res is lazyarray return out - return res[()] if isinstance(res, blosc2.LazyArray) else res + return res[()] if isinstance(res, blosc2.Operand) else res # Define empty ndindex tuple for function defaults @@ -228,7 +224,7 @@ class LazyArrayEnum(Enum): UDF = 1 -class LazyArray(ABC): +class LazyArray(ABC, blosc2.Operand): @abstractmethod def indices(self, order: str | list[str] | None = None) -> blosc2.LazyArray: """ @@ -408,68 +404,6 @@ def save(self, **kwargs: Any) -> None: """ pass - @property - @abstractmethod - def dtype(self) -> np.dtype: - """ - Get the data type of the :ref:`LazyArray`. - - Returns - ------- - out: np.dtype - The data type of the :ref:`LazyArray`. - """ - pass - - @property - @abstractmethod - def shape(self) -> tuple[int]: - """ - Get the shape of the :ref:`LazyArray`. - - Returns - ------- - out: tuple - The shape of the :ref:`LazyArray`. - """ - pass - - @property - @abstractmethod - def ndim(self) -> int: - """ - Get the number of dimensions of the :ref:`LazyArray`. - - Returns - ------- - out: int - The number of dimensions of the :ref:`LazyArray`. - """ - pass - - @property - @abstractmethod - def info(self) -> InfoReporter: - """ - Get information about the :ref:`LazyArray`. - - Returns - ------- - out: InfoReporter - A printable class with information about the :ref:`LazyArray`. - """ - pass - - # Provide minimal __array_interface__ to allow NumPy to work with this object - @property - def __array_interface__(self): - return { - "shape": self.shape, - "typestr": self.dtype.str, - "data": self[()], - "version": 3, - } - # Provide a way to serialize the LazyArray def to_cframe(self) -> bytes: """ @@ -482,11 +416,6 @@ def to_cframe(self) -> bytes: """ return self.compute().to_cframe() - def __bool__(self) -> bool: - if math.prod(self.shape) != 1: - raise ValueError(f"The truth value of a LazyArray of shape {self.shape} is ambiguous.") - return self[()].__bool__() - def convert_inputs(inputs): if not inputs or len(inputs) == 0: @@ -2328,7 +2257,7 @@ def __init__(self, new_op): # noqa: C901 "minimum", ): if np.isscalar(value1) and np.isscalar(value2): - self.expression = f"{op}(o0, o1)" + self.expression = f"{op}({value1}, {value2})" elif np.isscalar(value2): self.operands = {"o0": value1} self.expression = f"{op}(o0, {value2})" @@ -2339,6 +2268,15 @@ def __init__(self, new_op): # noqa: C901 self.operands = {"o0": value1, "o1": value2} self.expression = f"{op}(o0, o1)" return + elif isinstance(value1, LazyExpr) or isinstance(value2, LazyExpr): + if isinstance(value1, LazyExpr): + newexpr = value1.update_expr(new_op) + else: + newexpr = value2.update_expr(new_op) + self.expression = newexpr.expression + self.operands = newexpr.operands + self._dtype = newexpr.dtype + return self._dtype = dtype_ if np.isscalar(value1) and np.isscalar(value2): @@ -2359,16 +2297,6 @@ def __init__(self, new_op): # noqa: C901 if value1 is value2: self.operands = {"o0": value1} self.expression = f"(o0 {op} o0)" - elif isinstance(value1, LazyExpr) or isinstance(value2, LazyExpr): - if isinstance(value1, LazyExpr): - self.expression = value1.expression - self.operands = {"o0": value2} - else: - self.expression = value2.expression - self.operands = {"o0": value1} - newexpr = self.update_expr(new_op) - self.expression = newexpr.expression - self.operands = newexpr.operands else: # This is the very first time that a LazyExpr is formed from two operands # that are not LazyExpr themselves @@ -2404,6 +2332,7 @@ def update_expr(self, new_op): # noqa: C901 # One of the two operands are LazyExpr instances try: value1, op, value2 = new_op + dtype_ = check_dtype(op, value1, value2) # conserve dtype # The new expression and operands expression = None new_operands = {} @@ -2430,28 +2359,29 @@ def update_expr(self, new_op): # noqa: C901 new_operands, dup_op = fuse_operands(value1.operands, value2.operands) # Take expression 2 and rebase the operands while removing duplicates new_expr = fuse_expressions(value2.expression, len(value1.operands), dup_op) - expression = f"({self.expression} {op} {new_expr})" + expression = f"({value1.expression} {op} {new_expr})" + self.operands = value1.operands elif isinstance(value1, LazyExpr): if op == "~": - expression = f"({op}{self.expression})" + expression = f"({op}{value1.expression})" elif np.isscalar(value2): - expression = f"({self.expression} {op} {value2})" + expression = f"({value1.expression} {op} {value2})" elif hasattr(value2, "shape") and value2.shape == (): - expression = f"({self.expression} {op} {value2[()]})" + expression = f"({value1.expression} {op} {value2[()]})" else: operand_to_key = {id(v): k for k, v in value1.operands.items()} try: op_name = operand_to_key[id(value2)] except KeyError: - op_name = f"o{len(self.operands)}" + op_name = f"o{len(value1.operands)}" new_operands = {op_name: value2} - expression = f"({self.expression} {op} {op_name})" + expression = f"({value1.expression} {op} {op_name})" self.operands = value1.operands else: if np.isscalar(value1): - expression = f"({value1} {op} {self.expression})" + expression = f"({value1} {op} {value2.expression})" elif hasattr(value1, "shape") and value1.shape == (): - expression = f"({value1[()]} {op} {self.expression})" + expression = f"({value1[()]} {op} {value2.expression})" else: operand_to_key = {id(v): k for k, v in value2.operands.items()} try: @@ -2460,13 +2390,15 @@ def update_expr(self, new_op): # noqa: C901 op_name = f"o{len(value2.operands)}" new_operands = {op_name: value1} if op == "[]": # syntactic sugar for slicing - expression = f"({op_name}[{self.expression}])" + expression = f"({op_name}[{value2.expression}])" else: - expression = f"({op_name} {op} {self.expression})" + expression = f"({op_name} {op} {value2.expression})" self.operands = value2.operands # Return a new expression operands = self.operands | new_operands - return self._new_expr(expression, operands, guess=False, out=None, where=None) + expr = self._new_expr(expression, operands, guess=False, out=None, where=None) + expr._dtype = dtype_ # override dtype with preserved dtype + return expr finally: blosc2._disable_overloaded_equal = prev_flag @@ -2556,107 +2488,6 @@ def blocks(self): self._chunks, self._blocks = compute_chunks_blocks(self.shape, None, None, dtype=self.dtype) return self._blocks - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - # Handle operations at the array level - if method != "__call__": - return NotImplemented - - if ufunc in local_ufunc_map: - return local_ufunc_map[ufunc](*inputs) - - if ufunc in ufunc_map: - value = inputs[0] if inputs[1] is self else inputs[1] - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(value, ufunc_map[ufunc], self)) - - if ufunc in ufunc_map_1param: - value = inputs[0] - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(value, ufunc_map_1param[ufunc], None)) - - return NotImplemented - - def __neg__(self): - return self.update_expr(new_op=(0, "-", self)) - - def __add__(self, value): - return self.update_expr(new_op=(self, "+", value)) - - def __iadd__(self, other): - return self.update_expr(new_op=(self, "+", other)) - - def __radd__(self, value): - return self.update_expr(new_op=(value, "+", self)) - - def __sub__(self, value): - return self.update_expr(new_op=(self, "-", value)) - - def __isub__(self, value): - return self.update_expr(new_op=(self, "-", value)) - - def __rsub__(self, value): - return self.update_expr(new_op=(value, "-", self)) - - def __mul__(self, value): - return self.update_expr(new_op=(self, "*", value)) - - def __imul__(self, value): - return self.update_expr(new_op=(self, "*", value)) - - def __rmul__(self, value): - return self.update_expr(new_op=(value, "*", self)) - - def __truediv__(self, value): - return self.update_expr(new_op=(self, "/", value)) - - def __itruediv__(self, value): - return self.update_expr(new_op=(self, "/", value)) - - def __rtruediv__(self, value): - return self.update_expr(new_op=(value, "/", self)) - - def __and__(self, value): - return self.update_expr(new_op=(self, "&", value)) - - def __rand__(self, value): - return self.update_expr(new_op=(value, "&", self)) - - def __or__(self, value): - return self.update_expr(new_op=(self, "|", value)) - - def __ror__(self, value): - return self.update_expr(new_op=(value, "|", self)) - - def __invert__(self): - return self.update_expr(new_op=(self, "~", None)) - - def __pow__(self, value): - return self.update_expr(new_op=(self, "**", value)) - - def __rpow__(self, value): - return self.update_expr(new_op=(value, "**", self)) - - def __ipow__(self, value): - return self.update_expr(new_op=(self, "**", value)) - - def __lt__(self, value): - return self.update_expr(new_op=(self, "<", value)) - - def __le__(self, value): - return self.update_expr(new_op=(self, "<=", value)) - - def __eq__(self, value): - return self.update_expr(new_op=(self, "==", value)) - - def __ne__(self, value): - return self.update_expr(new_op=(self, "!=", value)) - - def __gt__(self, value): - return self.update_expr(new_op=(self, ">", value)) - - def __ge__(self, value): - return self.update_expr(new_op=(self, ">=", value)) - def where(self, value1=None, value2=None): """ Select value1 or value2 values based on the condition of the current expression. diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py index 012a2ba5..23ef1168 100644 --- a/src/blosc2/ndarray.py +++ b/src/blosc2/ndarray.py @@ -12,6 +12,7 @@ import inspect import math import tempfile +from abc import abstractmethod from collections import OrderedDict, namedtuple from functools import reduce from itertools import product @@ -3038,6 +3039,58 @@ def __array_interface__(self): "version": 3, } + @property + @abstractmethod + def dtype(self) -> np.dtype: + """ + Get the data type of the :ref:`Operand`. + + Returns + ------- + out: np.dtype + The data type of the :ref:`Operand`. + """ + pass + + @property + @abstractmethod + def shape(self) -> tuple[int]: + """ + Get the shape of the :ref:`Operand`. + + Returns + ------- + out: tuple + The shape of the :ref:`Operand`. + """ + pass + + @property + @abstractmethod + def ndim(self) -> int: + """ + Get the number of dimensions of the :ref:`Operand`. + + Returns + ------- + out: int + The number of dimensions of the :ref:`Operand`. + """ + pass + + @property + @abstractmethod + def info(self) -> InfoReporter: + """ + Get information about the :ref:`Operand`. + + Returns + ------- + out: InfoReporter + A printable class with information about the :ref:`Operand`. + """ + pass + @property def device(self): "Hardware device the array data resides on. Always equal to 'cpu'." @@ -5887,7 +5940,7 @@ def __init__(self, ndarr: NDArray, field: str): self.chunks = ndarr.chunks self.blocks = ndarr.blocks self.field = field - self.dtype = ndarr.dtype.fields[field][0] + self._dtype = ndarr.dtype.fields[field][0] self.offset = ndarr.dtype.fields[field][1] def __repr__(self): @@ -5905,6 +5958,11 @@ def shape(self) -> tuple[int]: """The shape of the associated :ref:`NDArray`.""" return self.ndarr.shape + @property + def dtype(self) -> np.dtype: + """The dtype of the field of associated :ref:`NDArray`.""" + return self._dtype + @property def schunk(self) -> blosc2.SChunk: """The associated :ref:`SChunk `.""" diff --git a/src/blosc2/proxy.py b/src/blosc2/proxy.py index 53e6e11e..6a527a9c 100644 --- a/src/blosc2/proxy.py +++ b/src/blosc2/proxy.py @@ -522,8 +522,32 @@ class ProxyNDField(blosc2.Operand): def __init__(self, proxy: Proxy, field: str): self.proxy = proxy self.field = field - self.dtype = proxy.dtype[field] - self.shape = proxy.shape + self._dtype = proxy.dtype[field] + self._shape = proxy.shape + + @property + def dtype(self) -> np.dtype: + """ + Get the data type of the :ref:`ProxyNDField`. + + Returns + ------- + out: np.dtype + The data type of the :ref:`ProxyNDField`. + """ + return self._dtype + + @property + def shape(self) -> tuple[int]: + """ + Get the shape of the :ref:`ProxyNDField`. + + Returns + ------- + out: tuple + The shape of the :ref:`ProxyNDField`. + """ + return self._shape def __getitem__(self, item: slice | list[slice]) -> np.ndarray: """ diff --git a/tests/ndarray/test_lazyudf.py b/tests/ndarray/test_lazyudf.py index be53e175..55f2180a 100644 --- a/tests/ndarray/test_lazyudf.py +++ b/tests/ndarray/test_lazyudf.py @@ -411,8 +411,8 @@ def test_offset(shape, chunks, blocks, slices, chunked_eval, eval_mode): ], ) def test_clip_logaddexp(shape, chunks, blocks, slices): - npa = np.arange(0, np.prod(shape)).reshape(shape) - npb = np.arange(1, np.prod(shape) + 1).reshape(shape) + npa = np.arange(0, np.prod(shape), dtype=np.float64).reshape(shape) + npb = np.arange(1, np.prod(shape) + 1, dtype=np.int64).reshape(shape) b = blosc2.asarray(npb) a = blosc2.asarray(npa) @@ -441,3 +441,17 @@ def test_clip_logaddexp(shape, chunks, blocks, slices): expr = blosc2.lazyexpr("logaddexp(a, b)") res = expr.compute(item=slices) np.testing.assert_allclose(res[...], npc[slices]) + + # Test LazyUDF has inherited __add__ from Operand class + expr = blosc2.logaddexp(b, a) + blosc2.clip(b, np.prod(shape) // 3, npb - 10) + npc = np.logaddexp(npb, npa) + np.clip(npb, np.prod(shape) // 3, npb - 10) + res = expr.compute(item=slices) + np.testing.assert_allclose(res[...], npc[slices]) + + # Test LazyUDF more + expr = blosc2.evaluate("logaddexp(b, a) + clip(b, np.prod(shape) // 3, npb - 10)") + np.testing.assert_allclose(expr, npc) + expr = blosc2.evaluate("sin(logaddexp(b, a))") + np.testing.assert_allclose(expr, np.sin(np.logaddexp(npb, npa))) + expr = blosc2.evaluate("clip(logaddexp(b, a), 6, 12)") + np.testing.assert_allclose(expr, np.clip(np.logaddexp(npb, npa), 6, 12))