diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py index aa6b45f2..39e42c82 100644 --- a/src/blosc2/__init__.py +++ b/src/blosc2/__init__.py @@ -438,6 +438,7 @@ def _raise(exc): lazyudf, lazyexpr, LazyArray, + LazyUDF, _open_lazyarray, get_expr_operands, validate_expr, @@ -450,7 +451,7 @@ def _raise(exc): from .schunk import SChunk, open from . import linalg from .linalg import tensordot, vecdot, permute_dims, matrix_transpose, matmul, transpose, diagonal, outer -from .shape_utils import linalg_funcs as linalg_funcs_list +from .utils import linalg_funcs as linalg_funcs_list from . import fft # Registry for postfilters @@ -613,6 +614,7 @@ def _raise(exc): "Filter", "LazyArray", "LazyExpr", + "LazyUDF", "NDArray", "NDField", "Operand", diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index fe91c2ea..58b6325b 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -40,16 +40,22 @@ import blosc2 from blosc2 import compute_chunks_blocks from blosc2.info import InfoReporter -from blosc2.ndarray import ( + +from .proxy import _convert_dtype +from .utils import ( NUMPY_GE_2_0, + constructors, + elementwise_funcs, get_chunks_idx, get_intersecting_chunks, + infer_shape, + linalg_attrs, + linalg_funcs, + npvecdot, process_key, + reducers, ) -from .proxy import _convert_dtype -from .shape_utils import constructors, elementwise_funcs, infer_shape, linalg_attrs, linalg_funcs, reducers - if not blosc2.IS_WASM: import numexpr @@ -78,7 +84,7 @@ safe_numpy_globals["bitwise_invert"] = np.bitwise_not safe_numpy_globals["concat"] = np.concatenate safe_numpy_globals["matrix_transpose"] = np.transpose - safe_numpy_globals["vecdot"] = blosc2.ndarray.npvecdot + safe_numpy_globals["vecdot"] = npvecdot def ne_evaluate(expression, local_dict=None, **kwargs): diff --git a/src/blosc2/linalg.py b/src/blosc2/linalg.py index cc2a3c76..4066f76a 100644 --- a/src/blosc2/linalg.py +++ b/src/blosc2/linalg.py @@ -9,7 +9,8 @@ import numpy as np import blosc2 -from blosc2.ndarray import get_intersecting_chunks, nptranspose, npvecdot, slice_to_chunktuple + +from .utils import get_intersecting_chunks, nptranspose, npvecdot, slice_to_chunktuple if TYPE_CHECKING: from collections.abc import Sequence diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py index fb1ff038..41367ea6 100644 --- a/src/blosc2/ndarray.py +++ b/src/blosc2/ndarray.py @@ -27,31 +27,23 @@ import ndindex import numpy as np -from ndindex.subindex_helpers import ceiling import blosc2 from blosc2 import SpecialValue, blosc2_ext, compute_chunks_blocks from blosc2.info import InfoReporter from blosc2.schunk import SChunk -# NumPy version and a convenient boolean flag -NUMPY_GE_2_0 = np.__version__ >= "2.0" -# handle different numpy versions -if NUMPY_GE_2_0: # array-api compliant - nplshift = np.bitwise_left_shift - nprshift = np.bitwise_right_shift - npbinvert = np.bitwise_invert - npvecdot = np.vecdot - nptranspose = np.permute_dims -else: # not array-api compliant - nplshift = np.left_shift - nprshift = np.right_shift - npbinvert = np.bitwise_not - nptranspose = np.transpose - - def npvecdot(a, b, axis=-1): - return np.einsum("...i,...i->...", np.moveaxis(np.conj(a), axis, -1), np.moveaxis(b, axis, -1)) - +from .linalg import matmul +from .utils import ( + _get_local_slice, + _get_selection, + get_chunks_idx, + npbinvert, + nplshift, + nprshift, + process_key, + slice_to_chunktuple, +) # These functions in ufunc_map in ufunc_map_1param are implemented in numexpr and so we call # those instead (since numexpr uses multithreading it is faster) @@ -179,15 +171,6 @@ def make_key_hashable(key): return key -def process_key(key, shape): - key = ndindex.ndindex(key).expand(shape).raw - mask = tuple( - isinstance(k, int) for k in key - ) # mask to track dummy dims introduced by int -> slice(k, k+1) - key = tuple(slice(k, k + 1, None) if isinstance(k, int) else k for k in key) # key is slice, None, int - return key, mask - - def get_ndarray_start_stop(ndim, key, shape): # key should be Nones and slices none_mask, start, stop, step = [], [], [], [] @@ -265,12 +248,6 @@ def check_contiguity(shape, part): return check_contiguity(shape, chunks) -def get_chunks_idx(shape, chunks): - chunks_idx = tuple(math.ceil(s / c) for s, c in zip(shape, chunks, strict=True)) - nchunks = math.prod(chunks_idx) - return chunks_idx, nchunks - - def get_flat_slices_orig(shape: tuple[int], s: tuple[slice, ...]) -> list[slice]: """ From array with `shape`, get the flattened list of slices corresponding to `s`. @@ -3074,6 +3051,7 @@ def chunkwise_logaddexp(inputs, output, offset): np.logical_and: logical_and, np.logical_or: logical_or, np.logical_xor: logical_xor, + np.matmul: matmul, } @@ -6320,132 +6298,6 @@ def take_along_axis(x: blosc2.Array, indices: blosc2.Array, axis: int = -1) -> N return blosc2.asarray(x[key]) -class MyChunkRange: - def __init__(self, start, stop, step=1, n=1): - self.start = start - self.stop = stop - self.step = step - self.n = n - - def __iter__(self): - for k in range(math.ceil((self.stop - self.start) / self.step)): - yield (self.start + k * self.step) // self.n - - -def slice_to_chunktuple(s, n): - # Adapted from _slice_iter in ndindex.ChunkSize.as_subchunks. - start, stop, step = s.start, s.stop, s.step - if step < 0: - temp = stop - stop = start + 1 - start = temp + 1 - step = -step # get positive steps - if step > n: - return MyChunkRange(start, stop, step, n) - else: - return range(start // n, ceiling(stop, n)) - - -def _get_selection(ctuple, ptuple, chunks): - # we assume that at least one element of chunk intersects with the slice - # (as a consequence of only looping over intersecting chunks) - # ptuple is global slice, ctuple is chunk coords (in units of chunks) - pselection = () - for i, s, csize in zip(ctuple, ptuple, chunks, strict=True): - # we need to advance to first element within chunk that intersects with slice, not - # necessarily the first element of chunk - # i * csize = s.start + n*step + k, already added n+1 elements, k in [1, step] - if s.step > 0: - np1 = (i * csize - s.start + s.step - 1) // s.step # gives (n + 1) - # can have n = -1 if s.start > i * csize, but never < -1 since have to intersect with chunk - pselection += ( - slice( - builtins.max( - s.start, s.start + np1 * s.step - ), # start+(n+1)*step gives i*csize if k=step - builtins.min(csize * (i + 1), s.stop), - s.step, - ), - ) - else: - # (i + 1) * csize = s.start + n*step + k, already added n+1 elements, k in [step+1, 0] - np1 = ((i + 1) * csize - s.start + s.step) // s.step # gives (n + 1) - # can have n = -1 if s.start < (i + 1) * csize, but never < -1 since have to intersect with chunk - pselection += ( - slice( - builtins.min(s.start, s.start + np1 * s.step), # start+n*step gives (i+1)*csize if k=0 - builtins.max(csize * i - 1, s.stop), # want to include csize * i - s.step, - ), - ) - - # selection relative to coordinates of out (necessarily out_step = 1 as we work through out chunk-by-chunk of self) - # when added n + 1 elements - # ps.start = pt.start + step * (n+1) => n = (ps.start - pt.start - sign) // step - # hence, out_start = n + 1 - # ps.stop = pt.start + step * (out_stop - 1) + k, k in [step, -1] or [1, step] - # => out_stop = (ps.stop - pt.start - sign) // step + 1 - out_pselection = () - i = 0 - for ps, pt in zip(pselection, ptuple, strict=True): - sign_ = np.sign(pt.step) - n = (ps.start - pt.start - sign_) // pt.step - out_start = n + 1 - # ps.stop always positive except for case where get full array (it is then -1 since desire 0th element) - out_stop = None if ps.stop == -1 else (ps.stop - pt.start - sign_) // pt.step + 1 - out_pselection += ( - slice( - out_start, - out_stop, - 1, - ), - ) - i += 1 - - loc_selection = tuple( # is s.stop is None, get whole chunk so s.start - 0 - slice(0, s.stop - s.start, s.step) - if s.step > 0 - else slice(s.start if s.stop == -1 else s.start - s.stop, None, s.step) - for s in pselection - ) # local coords of loaded part of chunk - - return out_pselection, pselection, loc_selection - - -def _get_local_slice(prior_selection, post_selection, chunk_bounds): - chunk_begin, chunk_end = chunk_bounds - # +1 for negative steps as have to include start (exclude stop) - locbegin = np.hstack( - ( - [s.start if s.step > 0 else s.stop + 1 for s in prior_selection], - chunk_begin, - [s.start if s.step > 0 else s.stop + 1 for s in post_selection], - ), - casting="unsafe", - dtype="int64", - ) - locend = np.hstack( - ( - [s.stop if s.step > 0 else s.start + 1 for s in prior_selection], - chunk_end, - [s.stop if s.step > 0 else s.start + 1 for s in post_selection], - ), - casting="unsafe", - dtype="int64", - ) - return locbegin, locend - - -def get_intersecting_chunks(_slice, shape, chunks): - if 0 not in chunks: - chunk_size = ndindex.ChunkSize(chunks) - return chunk_size.as_subchunks(_slice, shape) # if _slice is (), returns all chunks - else: - return ( - ndindex.ndindex(...).expand(shape), - ) # chunk is whole array so just return full tuple to do loop once - - def broadcast_to(arr: blosc2.Array, shape: tuple[int, ...]) -> NDArray: """ Broadcast an array to a new shape. diff --git a/src/blosc2/shape_utils.py b/src/blosc2/utils.py similarity index 75% rename from src/blosc2/shape_utils.py rename to src/blosc2/utils.py index c7c6c2b9..db611ea2 100644 --- a/src/blosc2/shape_utils.py +++ b/src/blosc2/utils.py @@ -1,9 +1,32 @@ import ast import builtins +import math import warnings +import ndindex +import numpy as np +from ndindex.subindex_helpers import ceiling from numpy import broadcast_shapes +# NumPy version and a convenient boolean flag +NUMPY_GE_2_0 = np.__version__ >= "2.0" +# handle different numpy versions +if NUMPY_GE_2_0: # array-api compliant + nplshift = np.bitwise_left_shift + nprshift = np.bitwise_right_shift + npbinvert = np.bitwise_invert + npvecdot = np.vecdot + nptranspose = np.permute_dims +else: # not array-api compliant + nplshift = np.left_shift + nprshift = np.right_shift + npbinvert = np.bitwise_not + nptranspose = np.transpose + + def npvecdot(a, b, axis=-1): + return np.einsum("...i,...i->...", np.moveaxis(np.conj(a), axis, -1), np.moveaxis(b, axis, -1)) + + elementwise_funcs = [ "abs", "acos", @@ -610,3 +633,144 @@ def infer_shape(expr, shapes): tree = ast.parse(expr, mode="eval") inferencer = ShapeInferencer(shapes) return inferencer.visit(tree.body) + + +class MyChunkRange: + def __init__(self, start, stop, step=1, n=1): + self.start = start + self.stop = stop + self.step = step + self.n = n + + def __iter__(self): + for k in range(math.ceil((self.stop - self.start) / self.step)): + yield (self.start + k * self.step) // self.n + + +def slice_to_chunktuple(s, n): + # Adapted from _slice_iter in ndindex.ChunkSize.as_subchunks. + start, stop, step = s.start, s.stop, s.step + if step < 0: + temp = stop + stop = start + 1 + start = temp + 1 + step = -step # get positive steps + if step > n: + return MyChunkRange(start, stop, step, n) + else: + return range(start // n, ceiling(stop, n)) + + +def _get_selection(ctuple, ptuple, chunks): + # we assume that at least one element of chunk intersects with the slice + # (as a consequence of only looping over intersecting chunks) + # ptuple is global slice, ctuple is chunk coords (in units of chunks) + pselection = () + for i, s, csize in zip(ctuple, ptuple, chunks, strict=True): + # we need to advance to first element within chunk that intersects with slice, not + # necessarily the first element of chunk + # i * csize = s.start + n*step + k, already added n+1 elements, k in [1, step] + if s.step > 0: + np1 = (i * csize - s.start + s.step - 1) // s.step # gives (n + 1) + # can have n = -1 if s.start > i * csize, but never < -1 since have to intersect with chunk + pselection += ( + slice( + builtins.max( + s.start, s.start + np1 * s.step + ), # start+(n+1)*step gives i*csize if k=step + builtins.min(csize * (i + 1), s.stop), + s.step, + ), + ) + else: + # (i + 1) * csize = s.start + n*step + k, already added n+1 elements, k in [step+1, 0] + np1 = ((i + 1) * csize - s.start + s.step) // s.step # gives (n + 1) + # can have n = -1 if s.start < (i + 1) * csize, but never < -1 since have to intersect with chunk + pselection += ( + slice( + builtins.min(s.start, s.start + np1 * s.step), # start+n*step gives (i+1)*csize if k=0 + builtins.max(csize * i - 1, s.stop), # want to include csize * i + s.step, + ), + ) + + # selection relative to coordinates of out (necessarily out_step = 1 as we work through out chunk-by-chunk of self) + # when added n + 1 elements + # ps.start = pt.start + step * (n+1) => n = (ps.start - pt.start - sign) // step + # hence, out_start = n + 1 + # ps.stop = pt.start + step * (out_stop - 1) + k, k in [step, -1] or [1, step] + # => out_stop = (ps.stop - pt.start - sign) // step + 1 + out_pselection = () + i = 0 + for ps, pt in zip(pselection, ptuple, strict=True): + sign_ = np.sign(pt.step) + n = (ps.start - pt.start - sign_) // pt.step + out_start = n + 1 + # ps.stop always positive except for case where get full array (it is then -1 since desire 0th element) + out_stop = None if ps.stop == -1 else (ps.stop - pt.start - sign_) // pt.step + 1 + out_pselection += ( + slice( + out_start, + out_stop, + 1, + ), + ) + i += 1 + + loc_selection = tuple( # is s.stop is None, get whole chunk so s.start - 0 + slice(0, s.stop - s.start, s.step) + if s.step > 0 + else slice(s.start if s.stop == -1 else s.start - s.stop, None, s.step) + for s in pselection + ) # local coords of loaded part of chunk + + return out_pselection, pselection, loc_selection + + +def _get_local_slice(prior_selection, post_selection, chunk_bounds): + chunk_begin, chunk_end = chunk_bounds + # +1 for negative steps as have to include start (exclude stop) + locbegin = np.hstack( + ( + [s.start if s.step > 0 else s.stop + 1 for s in prior_selection], + chunk_begin, + [s.start if s.step > 0 else s.stop + 1 for s in post_selection], + ), + casting="unsafe", + dtype="int64", + ) + locend = np.hstack( + ( + [s.stop if s.step > 0 else s.start + 1 for s in prior_selection], + chunk_end, + [s.stop if s.step > 0 else s.start + 1 for s in post_selection], + ), + casting="unsafe", + dtype="int64", + ) + return locbegin, locend + + +def get_intersecting_chunks(_slice, shape, chunks): + if 0 not in chunks: + chunk_size = ndindex.ChunkSize(chunks) + return chunk_size.as_subchunks(_slice, shape) # if _slice is (), returns all chunks + else: + return ( + ndindex.ndindex(...).expand(shape), + ) # chunk is whole array so just return full tuple to do loop once + + +def get_chunks_idx(shape, chunks): + chunks_idx = tuple(math.ceil(s / c) for s, c in zip(shape, chunks, strict=True)) + nchunks = math.prod(chunks_idx) + return chunks_idx, nchunks + + +def process_key(key, shape): + key = ndindex.ndindex(key).expand(shape).raw + mask = tuple( + isinstance(k, int) for k in key + ) # mask to track dummy dims introduced by int -> slice(k, k+1) + key = tuple(slice(k, k + 1, None) if isinstance(k, int) else k for k in key) # key is slice, None, int + return key, mask diff --git a/tests/ndarray/test_concat.py b/tests/ndarray/test_concat.py index 0705b6c4..9a00e4b4 100644 --- a/tests/ndarray/test_concat.py +++ b/tests/ndarray/test_concat.py @@ -10,7 +10,7 @@ import pytest import blosc2 -from blosc2.ndarray import NUMPY_GE_2_0 +from blosc2.utils import NUMPY_GE_2_0 if NUMPY_GE_2_0: # handle different versions of numpy npconcat = np.concat diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py index f0ef1ff4..963916c4 100644 --- a/tests/ndarray/test_lazyexpr.py +++ b/tests/ndarray/test_lazyexpr.py @@ -14,7 +14,7 @@ import blosc2 from blosc2.lazyexpr import ne_evaluate -from blosc2.ndarray import get_chunks_idx, npvecdot +from blosc2.utils import get_chunks_idx, npvecdot NITEMS_SMALL = 100 NITEMS = 1000 diff --git a/tests/ndarray/test_linalg.py b/tests/ndarray/test_linalg.py index ab1102cc..9c1a110d 100644 --- a/tests/ndarray/test_linalg.py +++ b/tests/ndarray/test_linalg.py @@ -7,7 +7,7 @@ import blosc2 from blosc2.lazyexpr import linalg_funcs -from blosc2.ndarray import npvecdot +from blosc2.utils import npvecdot @pytest.mark.parametrize( diff --git a/tests/ndarray/test_proxy.py b/tests/ndarray/test_proxy.py index 0b49d268..eb55f32c 100644 --- a/tests/ndarray/test_proxy.py +++ b/tests/ndarray/test_proxy.py @@ -10,7 +10,7 @@ import pytest import blosc2 -from blosc2.ndarray import get_chunks_idx +from blosc2.utils import get_chunks_idx argnames = "urlpath, shape, chunks, blocks, slices, dtype" argvalues = [