diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py index 39e42c82..12a2a908 100644 --- a/src/blosc2/__init__.py +++ b/src/blosc2/__init__.py @@ -15,6 +15,13 @@ import numpy as np +_HAS_NUMBA = False +try: + import numba + + _HAS_NUMBA = True +except ImportError: + pass # Do the platform check once at module level IS_WASM = platform.machine() == "wasm32" # IS_WASM = True # for testing (comment this line out for production) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index 8a02a79b..7613c9f2 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -20,6 +20,7 @@ import pathlib import re import sys +import textwrap import threading from abc import ABC, abstractmethod, abstractproperty from dataclasses import asdict @@ -39,6 +40,9 @@ import numpy as np import blosc2 + +if blosc2._HAS_NUMBA: + import numba from blosc2 import compute_chunks_blocks from blosc2.info import InfoReporter @@ -3409,7 +3413,7 @@ def save(self, urlpath=None, **kwargs): raise ValueError("To save a LazyArray, all operands must be stored on disk/network") operands[key] = value.schunk.urlpath array.schunk.vlmeta["_LazyArray"] = { - "UDF": inspect.getsource(self.func), + "UDF": textwrap.dedent(inspect.getsource(self.func)).lstrip(), "operands": operands, "name": self.func.__name__, } @@ -3712,11 +3716,20 @@ def _open_lazyarray(array): local_ns = {} name = lazyarray["name"] filename = f"<{name}>" # any unique name + SAFE_GLOBALS = { + "__builtins__": { + name: value for name, value in builtins.__dict__.items() if name != "__import__" + }, + "np": np, + "blosc2": blosc2, + } + if blosc2._HAS_NUMBA: + SAFE_GLOBALS["numba"] = numba # Register the source so inspect can find it linecache.cache[filename] = (len(expr), None, expr.splitlines(True), filename) - exec(compile(expr, filename, "exec"), {"np": np, "blosc2": blosc2}, local_ns) + exec(compile(expr, filename, "exec"), SAFE_GLOBALS, local_ns) func = local_ns[name] # TODO: make more robust for general kwargs (not just cparams) new_expr = blosc2.lazyudf( diff --git a/tests/ndarray/test_lazyudf.py b/tests/ndarray/test_lazyudf.py index 1651c74a..29b44e39 100644 --- a/tests/ndarray/test_lazyudf.py +++ b/tests/ndarray/test_lazyudf.py @@ -18,6 +18,15 @@ def udf1p(inputs_tuple, output, offset): output[:] = x + 1 +if blosc2._HAS_NUMBA: + import numba + + @numba.jit(parallel=True) + def udf1p_numba(inputs_tuple, output, offset): + x = inputs_tuple[0] + output[:] = x + 1 + + @pytest.mark.parametrize("chunked_eval", [True, False]) @pytest.mark.parametrize( ("shape", "chunks", "blocks"), @@ -472,6 +481,18 @@ def test_save_ludf(): assert isinstance(expr, blosc2.LazyUDF) res_lazyexpr = expr.compute() np.testing.assert_array_equal(res_lazyexpr[:], npc) + blosc2.remove_urlpath(urlpath) + + if blosc2._HAS_NUMBA: + expr = blosc2.lazyudf(udf1p_numba, (array,), np.float64) + expr.save(urlpath=urlpath) + del expr + expr = blosc2.open(urlpath) + assert isinstance(expr, blosc2.LazyUDF) + res_lazyexpr = expr.compute() + np.testing.assert_array_equal(res_lazyexpr[:], npc) + + blosc2.remove_urlpath("a.b2nd") # Test get_chunk method