From e1e3dba204c7b968ea66aa2818fbaae87966efa8 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Wed, 3 Dec 2025 10:49:10 +0100 Subject: [PATCH 1/2] Allow saving of numba-decorated lazyudfs --- src/blosc2/__init__.py | 6 ++++++ src/blosc2/lazyexpr.py | 17 +++++++++++++++-- tests/ndarray/test_lazyudf.py | 21 +++++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py index 39e42c82..6dac907c 100644 --- a/src/blosc2/__init__.py +++ b/src/blosc2/__init__.py @@ -15,6 +15,12 @@ import numpy as np +_NUMBA_ = True +try: + import numba +except ImportError: + _NUMBA_ = False + # Do the platform check once at module level IS_WASM = platform.machine() == "wasm32" # IS_WASM = True # for testing (comment this line out for production) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index 8a02a79b..d5e74b95 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -20,6 +20,7 @@ import pathlib import re import sys +import textwrap import threading from abc import ABC, abstractmethod, abstractproperty from dataclasses import asdict @@ -39,6 +40,9 @@ import numpy as np import blosc2 + +if blosc2._NUMBA_: + import numba from blosc2 import compute_chunks_blocks from blosc2.info import InfoReporter @@ -3409,7 +3413,7 @@ def save(self, urlpath=None, **kwargs): raise ValueError("To save a LazyArray, all operands must be stored on disk/network") operands[key] = value.schunk.urlpath array.schunk.vlmeta["_LazyArray"] = { - "UDF": inspect.getsource(self.func), + "UDF": textwrap.dedent(inspect.getsource(self.func)).lstrip(), "operands": operands, "name": self.func.__name__, } @@ -3712,11 +3716,20 @@ def _open_lazyarray(array): local_ns = {} name = lazyarray["name"] filename = f"<{name}>" # any unique name + SAFE_GLOBALS = { + "__builtins__": { + name: value for name, value in builtins.__dict__.items() if name != "__import__" + }, + "np": np, + "blosc2": blosc2, + } + if blosc2._NUMBA_: + SAFE_GLOBALS["numba"] = numba # Register the source so inspect can find it linecache.cache[filename] = (len(expr), None, expr.splitlines(True), filename) - exec(compile(expr, filename, "exec"), {"np": np, "blosc2": blosc2}, local_ns) + exec(compile(expr, filename, "exec"), SAFE_GLOBALS, local_ns) func = local_ns[name] # TODO: make more robust for general kwargs (not just cparams) new_expr = blosc2.lazyudf( diff --git a/tests/ndarray/test_lazyudf.py b/tests/ndarray/test_lazyudf.py index 1651c74a..03242295 100644 --- a/tests/ndarray/test_lazyudf.py +++ b/tests/ndarray/test_lazyudf.py @@ -18,6 +18,15 @@ def udf1p(inputs_tuple, output, offset): output[:] = x + 1 +if blosc2._NUMBA_: + import numba + + @numba.jit(parallel=True) + def udf1p_numba(inputs_tuple, output, offset): + x = inputs_tuple[0] + output[:] = x + 1 + + @pytest.mark.parametrize("chunked_eval", [True, False]) @pytest.mark.parametrize( ("shape", "chunks", "blocks"), @@ -472,6 +481,18 @@ def test_save_ludf(): assert isinstance(expr, blosc2.LazyUDF) res_lazyexpr = expr.compute() np.testing.assert_array_equal(res_lazyexpr[:], npc) + blosc2.remove_urlpath(urlpath) + + if blosc2._NUMBA_: + expr = blosc2.lazyudf(udf1p_numba, (array,), np.float64) + expr.save(urlpath=urlpath) + del expr + expr = blosc2.open(urlpath) + assert isinstance(expr, blosc2.LazyUDF) + res_lazyexpr = expr.compute() + np.testing.assert_array_equal(res_lazyexpr[:], npc) + + blosc2.remove_urlpath("a.b2nd") # Test get_chunk method From 75e964737c4b8d8d94cc990b2247a0246126d244 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Wed, 3 Dec 2025 11:35:52 +0100 Subject: [PATCH 2/2] Rename NUMBA flag --- src/blosc2/__init__.py | 7 ++++--- src/blosc2/lazyexpr.py | 4 ++-- tests/ndarray/test_lazyudf.py | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py index 6dac907c..12a2a908 100644 --- a/src/blosc2/__init__.py +++ b/src/blosc2/__init__.py @@ -15,12 +15,13 @@ import numpy as np -_NUMBA_ = True +_HAS_NUMBA = False try: import numba -except ImportError: - _NUMBA_ = False + _HAS_NUMBA = True +except ImportError: + pass # Do the platform check once at module level IS_WASM = platform.machine() == "wasm32" # IS_WASM = True # for testing (comment this line out for production) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index d5e74b95..7613c9f2 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -41,7 +41,7 @@ import blosc2 -if blosc2._NUMBA_: +if blosc2._HAS_NUMBA: import numba from blosc2 import compute_chunks_blocks from blosc2.info import InfoReporter @@ -3723,7 +3723,7 @@ def _open_lazyarray(array): "np": np, "blosc2": blosc2, } - if blosc2._NUMBA_: + if blosc2._HAS_NUMBA: SAFE_GLOBALS["numba"] = numba # Register the source so inspect can find it diff --git a/tests/ndarray/test_lazyudf.py b/tests/ndarray/test_lazyudf.py index 03242295..29b44e39 100644 --- a/tests/ndarray/test_lazyudf.py +++ b/tests/ndarray/test_lazyudf.py @@ -18,7 +18,7 @@ def udf1p(inputs_tuple, output, offset): output[:] = x + 1 -if blosc2._NUMBA_: +if blosc2._HAS_NUMBA: import numba @numba.jit(parallel=True) @@ -483,7 +483,7 @@ def test_save_ludf(): np.testing.assert_array_equal(res_lazyexpr[:], npc) blosc2.remove_urlpath(urlpath) - if blosc2._NUMBA_: + if blosc2._HAS_NUMBA: expr = blosc2.lazyudf(udf1p_numba, (array,), np.float64) expr.save(urlpath=urlpath) del expr