diff --git a/numba_dppy/dppy_host_fn_call_gen.py b/numba_dppy/dppy_host_fn_call_gen.py index 7d1c9bcea4..2808ddf90d 100644 --- a/numba_dppy/dppy_host_fn_call_gen.py +++ b/numba_dppy/dppy_host_fn_call_gen.py @@ -52,7 +52,7 @@ def _init_llvm_types_and_constants(self): self.byte_ptr_t = lc.Type.pointer(self.byte_t) self.byte_ptr_ptr_t = lc.Type.pointer(self.byte_ptr_t) self.intp_t = self.context.get_value_type(types.intp) - self.long_t = self.context.get_value_type(types.int64) + self.int64_t = self.context.get_value_type(types.int64) self.int32_t = self.context.get_value_type(types.int32) self.int32_ptr_t = lc.Type.pointer(self.int32_t) self.uintp_t = self.context.get_value_type(types.uintp) @@ -113,23 +113,26 @@ def allocate_kenrel_arg_array(self, num_kernel_args): def resolve_and_return_dpctl_type(self, ty): + """This function looks up the dpctl defined enum values from DPCTLKernelArgType. + """ + val = None if ty == types.int32 or isinstance(ty, types.scalars.IntegerLiteral): - val = self.context.get_constant(types.int32, 4) + val = self.context.get_constant(types.int32, 9) # DPCTL_LONG_LONG elif ty == types.uint32: - val = self.context.get_constant(types.int32, 5) + val = self.context.get_constant(types.int32, 10) # DPCTL_UNSIGNED_LONG_LONG elif ty == types.boolean: - val = self.context.get_constant(types.int32, 5) + val = self.context.get_constant(types.int32, 5) # DPCTL_UNSIGNED_INT elif ty == types.int64: - val = self.context.get_constant(types.int32, 7) + val = self.context.get_constant(types.int32, 9) # DPCTL_LONG_LONG elif ty == types.uint64: - val = self.context.get_constant(types.int32, 8) + val = self.context.get_constant(types.int32, 11) # DPCTL_SIZE_T elif ty == types.float32: - val = self.context.get_constant(types.int32, 12) + val = self.context.get_constant(types.int32, 12) # DPCTL_FLOAT elif ty == types.float64: - val = self.context.get_constant(types.int32, 13) + val = self.context.get_constant(types.int32, 13) # DPCTL_DOUBLE elif ty == types.voidptr: - val = self.context.get_constant(types.int32, 15) + val = self.context.get_constant(types.int32, 15) # DPCTL_VOID_PTR else: raise NotImplementedError @@ -151,12 +154,12 @@ def process_kernel_arg(self, var, llvm_arg, arg_type, gu_sig, val_type, index, m if llvm_arg is None: raise NotImplementedError(arg_type, var) - storage = cgutils.alloca_once(self.builder, self.long_t) + storage = cgutils.alloca_once(self.builder, self.int64_t) self.builder.store(self.context.get_constant(types.int64, 0), storage) ty = self.resolve_and_return_dpctl_type(types.int64) self.form_kernel_arg_and_arg_ty(self.builder.bitcast(storage, self.void_ptr_t), ty) - storage = cgutils.alloca_once(self.builder, self.long_t) + storage = cgutils.alloca_once(self.builder, self.int64_t) self.builder.store(self.context.get_constant(types.int64, 0), storage) ty = self.resolve_and_return_dpctl_type(types.int64) self.form_kernel_arg_and_arg_ty(self.builder.bitcast(storage, self.void_ptr_t), ty) diff --git a/numba_dppy/examples/pa_examples/test1-2d.py b/numba_dppy/examples/pa_examples/test1-2d.py index 7985216aba..df3849b30d 100644 --- a/numba_dppy/examples/pa_examples/test1-2d.py +++ b/numba_dppy/examples/pa_examples/test1-2d.py @@ -1,23 +1,29 @@ from numba import njit, gdb import numpy as np +import dpctl -@njit(parallel={'offload':True}) + +@njit def f1(a, b): c = a + b return c + N = 1000 print("N", N) -a = np.ones((N,N), dtype=np.float32) -b = np.ones((N,N), dtype=np.float32) +a = np.ones((N, N), dtype=np.float32) +b = np.ones((N, N), dtype=np.float32) print("a:", a, hex(a.ctypes.data)) print("b:", b, hex(b.ctypes.data)) -c = f1(a,b) + +with dpctl.device_context("opencl:gpu:0"): + c = f1(a, b) + print("BIG RESULT c:", c, hex(c.ctypes.data)) for i in range(N): for j in range(N): - if c[i,j] != 2.0: + if c[i, j] != 2.0: print("First index not equal to 2.0 was", i, j) break diff --git a/numba_dppy/examples/pa_examples/test1-3d.py b/numba_dppy/examples/pa_examples/test1-3d.py index 1304c0762a..a69aa0cbc5 100644 --- a/numba_dppy/examples/pa_examples/test1-3d.py +++ b/numba_dppy/examples/pa_examples/test1-3d.py @@ -1,24 +1,30 @@ from numba import njit, gdb import numpy as np +import dpctl -@njit(parallel={'offload':True}) + +@njit def f1(a, b): c = a + b return c + N = 10 print("N", N) -a = np.ones((N,N,N), dtype=np.float32) -b = np.ones((N,N,N), dtype=np.float32) +a = np.ones((N, N, N), dtype=np.float32) +b = np.ones((N, N, N), dtype=np.float32) print("a:", a, hex(a.ctypes.data)) print("b:", b, hex(b.ctypes.data)) -c = f1(a,b) + +with dpctl.device_context("opencl:gpu:0"): + c = f1(a, b) + print("BIG RESULT c:", c, hex(c.ctypes.data)) for i in range(N): for j in range(N): for k in range(N): - if c[i,j,k] != 2.0: + if c[i, j, k] != 2.0: print("First index not equal to 2.0 was", i, j, k) break diff --git a/numba_dppy/examples/pa_examples/test1-4d.py b/numba_dppy/examples/pa_examples/test1-4d.py index bb52da28de..2647d0e66e 100644 --- a/numba_dppy/examples/pa_examples/test1-4d.py +++ b/numba_dppy/examples/pa_examples/test1-4d.py @@ -1,25 +1,31 @@ from numba import njit, gdb import numpy as np +import dpctl -@njit(parallel={'offload':True}) + +@njit def f1(a, b): c = a + b return c + N = 10 print("N", N) -a = np.ones((N,N,N,N), dtype=np.float32) -b = np.ones((N,N,N,N), dtype=np.float32) +a = np.ones((N, N, N, N), dtype=np.float32) +b = np.ones((N, N, N, N), dtype=np.float32) print("a:", a, hex(a.ctypes.data)) print("b:", b, hex(b.ctypes.data)) -c = f1(a,b) + +with dpctl.device_context("opencl:gpu:0"): + c = f1(a, b) + print("BIG RESULT c:", c, hex(c.ctypes.data)) for i in range(N): for j in range(N): for k in range(N): for l in range(N): - if c[i,j,k,l] != 2.0: + if c[i, j, k, l] != 2.0: print("First index not equal to 2.0 was", i, j, k, l) break diff --git a/numba_dppy/examples/pa_examples/test1-5d.py b/numba_dppy/examples/pa_examples/test1-5d.py index e795dbe602..893fe3b6a6 100644 --- a/numba_dppy/examples/pa_examples/test1-5d.py +++ b/numba_dppy/examples/pa_examples/test1-5d.py @@ -1,26 +1,32 @@ from numba import njit, gdb import numpy as np +import dpctl -@njit(parallel={'offload':True}) + +@njit def f1(a, b): c = a + b return c + N = 5 print("N", N) -a = np.ones((N,N,N,N,N), dtype=np.float32) -b = np.ones((N,N,N,N,N), dtype=np.float32) +a = np.ones((N, N, N, N, N), dtype=np.float32) +b = np.ones((N, N, N, N, N), dtype=np.float32) print("a:", a, hex(a.ctypes.data)) print("b:", b, hex(b.ctypes.data)) -c = f1(a,b) + +with dpctl.device_context("opencl:gpu:0"): + c = f1(a, b) + print("BIG RESULT c:", c, hex(c.ctypes.data)) for i in range(N): for j in range(N): for k in range(N): for l in range(N): for m in range(N): - if c[i,j,k,l,m] != 2.0: + if c[i, j, k, l, m] != 2.0: print("First index not equal to 2.0 was", i, j, k, l, m) break diff --git a/numba_dppy/examples/pa_examples/test1.py b/numba_dppy/examples/pa_examples/test1.py index 1620654cf8..01209b3309 100644 --- a/numba_dppy/examples/pa_examples/test1.py +++ b/numba_dppy/examples/pa_examples/test1.py @@ -1,8 +1,9 @@ from numba import njit import numpy as np +import dpctl -@njit(parallel={'offload':True}) +@njit def f1(a, b): c = a + b return c @@ -19,7 +20,10 @@ def main(): print("a:", a, hex(a.ctypes.data)) print("b:", b, hex(b.ctypes.data)) - c = f1(a,b) + + with dpctl.device_context("opencl:gpu:0"): + c = f1(a, b) + print("RESULT c:", c, hex(c.ctypes.data)) for i in range(N): if c[i] != 2.0: diff --git a/numba_dppy/tests/test_dpnp_functions.py b/numba_dppy/tests/test_dpnp_functions.py index b0837f5ba6..c4749885ba 100644 --- a/numba_dppy/tests/test_dpnp_functions.py +++ b/numba_dppy/tests/test_dpnp_functions.py @@ -5,7 +5,9 @@ import sys import numpy as np from numba import njit -import numba_dppy, numba_dppy as dppy +import numba_dppy +import numba_dppy as dppy +import dpctl from numba_dppy.testing import unittest from numba_dppy.testing import DPPYTestCase @@ -14,10 +16,14 @@ def test_for_different_datatypes(fn, test_fn, dims, arg_count, tys, np_all=False if arg_count == 1: for ty in tys: if matrix and matrix[0]: - a = np.array(np.random.random(dims[0] * dims[1]), dtype=ty).reshape(dims[0], dims[1]) + a = np.array(np.random.random( + dims[0] * dims[1]), dtype=ty).reshape(dims[0], dims[1]) else: a = np.array(np.random.random(dims[0]), dtype=ty) - c = fn(a) + + with dpctl.device_context("opencl:gpu"): + c = fn(a) + d = test_fn(a) if np_all: max_abs_err = np.all(c - d) @@ -29,15 +35,19 @@ def test_for_different_datatypes(fn, test_fn, dims, arg_count, tys, np_all=False elif arg_count == 2: for ty in tys: if matrix and matrix[0]: - a = np.array(np.random.random(dims[0] * dims[1]), dtype=ty).reshape(dims[0], dims[1]) + a = np.array(np.random.random( + dims[0] * dims[1]), dtype=ty).reshape(dims[0], dims[1]) else: a = np.array(np.random.random(dims[0] * dims[1]), dtype=ty) if matrix and matrix[1]: - b = np.array(np.random.random(dims[2] * dims[3]), dtype=ty).reshape(dims[2], dims[3]) + b = np.array(np.random.random( + dims[2] * dims[3]), dtype=ty).reshape(dims[2], dims[3]) else: b = np.array(np.random.random(dims[2] * dims[3]), dtype=ty) - c = fn(a, b) + with dpctl.device_context("opencl:gpu"): + c = fn(a, b) + d = test_fn(a, b) if np_all: max_abs_err = np.sum(c - d) @@ -48,6 +58,7 @@ def test_for_different_datatypes(fn, test_fn, dims, arg_count, tys, np_all=False return True + def test_for_dimensions(fn, test_fn, dims, tys, np_all=False): total_size = 1 for d in dims: @@ -55,7 +66,10 @@ def test_for_dimensions(fn, test_fn, dims, tys, np_all=False): for ty in tys: a = np.array(np.random.random(total_size), dtype=ty).reshape(dims) - c = fn(a) + + with dpctl.device_context("opencl:gpu"): + c = fn(a) + d = test_fn(a) if np_all: max_abs_err = np.all(c - d) @@ -66,6 +80,7 @@ def test_for_dimensions(fn, test_fn, dims, tys, np_all=False): return True + def ensure_dpnp(): try: # import dpnp @@ -75,8 +90,9 @@ def ensure_dpnp(): return False -@unittest.skipUnless(ensure_dpnp(), 'test only when dpNP is available') +@unittest.skipUnless(ensure_dpnp() and dpctl.has_gpu_queues(), 'test only when dpNP and GPU is available') class Testdpnp_functions(DPPYTestCase): + N = 10 a = np.array(np.random.random(N), dtype=np.float32) @@ -84,123 +100,140 @@ class Testdpnp_functions(DPPYTestCase): tys = [np.int32, np.uint32, np.int64, np.uint64, np.float, np.double] def test_sum(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.sum(a) return c - self.assertTrue(test_for_different_datatypes(f, np.sum, [10], 1, self.tys)) + self.assertTrue(test_for_different_datatypes( + f, np.sum, [10], 1, self.tys)) self.assertTrue(test_for_dimensions(f, np.sum, [10, 2], self.tys)) self.assertTrue(test_for_dimensions(f, np.sum, [10, 2, 3], self.tys)) def test_prod(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.prod(a) return c - self.assertTrue(test_for_different_datatypes(f, np.prod, [10], 1, self.tys)) + self.assertTrue(test_for_different_datatypes( + f, np.prod, [10], 1, self.tys)) self.assertTrue(test_for_dimensions(f, np.prod, [10, 2], self.tys)) self.assertTrue(test_for_dimensions(f, np.prod, [10, 2, 3], self.tys)) def test_argmax(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.argmax(a) return c - self.assertTrue(test_for_different_datatypes(f, np.argmax, [10], 1, self.tys)) + self.assertTrue(test_for_different_datatypes( + f, np.argmax, [10], 1, self.tys)) self.assertTrue(test_for_dimensions(f, np.argmax, [10, 2], self.tys)) - self.assertTrue(test_for_dimensions(f, np.argmax, [10, 2, 3], self.tys)) + self.assertTrue(test_for_dimensions( + f, np.argmax, [10, 2, 3], self.tys)) def test_max(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.max(a) return c - self.assertTrue(test_for_different_datatypes(f, np.max, [10], 1, self.tys)) + self.assertTrue(test_for_different_datatypes( + f, np.max, [10], 1, self.tys)) self.assertTrue(test_for_dimensions(f, np.max, [10, 2], self.tys)) self.assertTrue(test_for_dimensions(f, np.max, [10, 2, 3], self.tys)) def test_argmin(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.argmin(a) return c - self.assertTrue(test_for_different_datatypes(f, np.argmin, [10], 1, self.tys)) + self.assertTrue(test_for_different_datatypes( + f, np.argmin, [10], 1, self.tys)) self.assertTrue(test_for_dimensions(f, np.argmin, [10, 2], self.tys)) - self.assertTrue(test_for_dimensions(f, np.argmin, [10, 2, 3], self.tys)) + self.assertTrue(test_for_dimensions( + f, np.argmin, [10, 2, 3], self.tys)) def test_min(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.min(a) return c - self.assertTrue(test_for_different_datatypes(f, np.min, [10], 1, self.tys)) + self.assertTrue(test_for_different_datatypes( + f, np.min, [10], 1, self.tys)) self.assertTrue(test_for_dimensions(f, np.min, [10, 2], self.tys)) self.assertTrue(test_for_dimensions(f, np.min, [10, 2, 3], self.tys)) def test_argsort(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.argsort(a) return c - self.assertTrue(test_for_different_datatypes(f, np.argmin, [10], 1, self.tys, np_all=True)) + self.assertTrue(test_for_different_datatypes( + f, np.argmin, [10], 1, self.tys, np_all=True)) def test_median(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.median(a) return c - self.assertTrue(test_for_different_datatypes(f, np.median, [10], 1, self.tys)) + self.assertTrue(test_for_different_datatypes( + f, np.median, [10], 1, self.tys)) self.assertTrue(test_for_dimensions(f, np.median, [10, 2], self.tys)) - self.assertTrue(test_for_dimensions(f, np.median, [10, 2, 3], self.tys)) + self.assertTrue(test_for_dimensions( + f, np.median, [10, 2, 3], self.tys)) def test_mean(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.mean(a) return c - self.assertTrue(test_for_different_datatypes(f, np.mean, [10], 1, self.tys)) + self.assertTrue(test_for_different_datatypes( + f, np.mean, [10], 1, self.tys)) self.assertTrue(test_for_dimensions(f, np.mean, [10, 2], self.tys)) self.assertTrue(test_for_dimensions(f, np.mean, [10, 2, 3], self.tys)) def test_matmul(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.matmul(a, b) return c - self.assertTrue(test_for_different_datatypes(f, np.matmul, [10, 5, 5, 10], 2, [np.float, np.double], np_all=True, matrix=[True, True])) + self.assertTrue(test_for_different_datatypes(f, np.matmul, [10, 5, 5, 10], 2, [ + np.float, np.double], np_all=True, matrix=[True, True])) def test_dot(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.dot(a, b) return c - self.assertTrue(test_for_different_datatypes(f, np.dot, [10, 1, 10, 1], 2, [np.float, np.double])) - self.assertTrue(test_for_different_datatypes(f, np.dot, [10, 1, 10, 2], 2, [np.float, np.double], matrix=[False, True], np_all=True)) - self.assertTrue(test_for_different_datatypes(f, np.dot, [2, 10, 10, 1], 2, [np.float, np.double], matrix=[True, False], np_all=True)) - self.assertTrue(test_for_different_datatypes(f, np.dot, [10, 2, 2, 10], 2, [np.float, np.double], matrix=[True, True], np_all=True)) - + self.assertTrue(test_for_different_datatypes( + f, np.dot, [10, 1, 10, 1], 2, [np.float, np.double])) + self.assertTrue(test_for_different_datatypes(f, np.dot, [10, 1, 10, 2], 2, [ + np.float, np.double], matrix=[False, True], np_all=True)) + self.assertTrue(test_for_different_datatypes(f, np.dot, [2, 10, 10, 1], 2, [ + np.float, np.double], matrix=[True, False], np_all=True)) + self.assertTrue(test_for_different_datatypes(f, np.dot, [10, 2, 2, 10], 2, [ + np.float, np.double], matrix=[True, True], np_all=True)) def test_cov(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.cov(a) return c - self.assertTrue(test_for_different_datatypes(f, np.cov, [10, 7], 1, self.tys, matrix=[True], np_all=True)) + self.assertTrue(test_for_different_datatypes( + f, np.cov, [10, 7], 1, self.tys, matrix=[True], np_all=True)) def test_dpnp_interacting_with_parfor(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.sum(a) e = np.add(b, a) diff --git a/numba_dppy/tests/test_dppl_fallback.py b/numba_dppy/tests/test_dppl_fallback.py index 8519f4fb14..76792f5744 100644 --- a/numba_dppy/tests/test_dppl_fallback.py +++ b/numba_dppy/tests/test_dppl_fallback.py @@ -3,7 +3,8 @@ import numpy as np import numba -import numba_dppy, numba_dppy as dppy +import numba_dppy +import numba_dppy as dppy from numba_dppy.testing import unittest from numba_dppy.testing import DPPYTestCase from numba.tests.support import captured_stderr @@ -28,28 +29,30 @@ def inner_call_fallback(): return a - with captured_stderr() as msg: - dppy = numba.njit(parallel={'offload':True})(inner_call_fallback) + with captured_stderr() as msg, dpctl.device_context("opencl:gpu"): + dppy = numba.njit(inner_call_fallback) dppy_result = dppy() ref_result = inner_call_fallback() np.testing.assert_array_equal(dppy_result, ref_result) - self.assertTrue('Failed to lower parfor on DPPY-device' in msg.getvalue()) + self.assertTrue( + 'Failed to lower parfor on DPPY-device' in msg.getvalue()) def test_dppy_fallback_reductions(self): def reduction(a): return np.amax(a) a = np.ones(10) - with captured_stderr() as msg: - dppy = numba.njit(parallel={'offload':True})(reduction) + with captured_stderr() as msg, dpctl.device_context("opencl:gpu"): + dppy = numba.njit(reduction) dppy_result = dppy(a) ref_result = reduction(a) np.testing.assert_array_equal(dppy_result, ref_result) - self.assertTrue('Failed to lower parfor on DPPY-device' in msg.getvalue()) + self.assertTrue( + 'Failed to lower parfor on DPPY-device' in msg.getvalue()) if __name__ == '__main__': diff --git a/numba_dppy/tests/test_numpy_bit_twiddling_functions.py b/numba_dppy/tests/test_numpy_bit_twiddling_functions.py index de6b7bc963..8d022a0bb1 100644 --- a/numba_dppy/tests/test_numpy_bit_twiddling_functions.py +++ b/numba_dppy/tests/test_numpy_bit_twiddling_functions.py @@ -5,104 +5,115 @@ import sys import numpy as np from numba import njit -import numba_dppy, numba_dppy as dppy +import numba_dppy +import numba_dppy as dppy +import dpctl from numba_dppy.testing import unittest from numba_dppy.testing import DPPYTestCase +@unittest.skipUnless(dpctl.has_gpu_queues(), 'test only on GPU system') class TestNumpy_bit_twiddling_functions(DPPYTestCase): def test_bitwise_and(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.bitwise_and(a, b) return c - a = np.array([2,5,255]) - b = np.array([3,14,16]) + a = np.array([2, 5, 255]) + b = np.array([3, 14, 16]) + + with dpctl.device_context("opencl:gpu"): + c = f(a, b) - c = f(a, b) d = np.bitwise_and(a, b) self.assertTrue(np.all(c == d)) - def test_bitwise_or(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.bitwise_or(a, b) return c - a = np.array([2,5,255]) - b = np.array([4,4,4]) + a = np.array([2, 5, 255]) + b = np.array([4, 4, 4]) + + with dpctl.device_context("opencl:gpu"): + c = f(a, b) - c = f(a, b) d = np.bitwise_or(a, b) self.assertTrue(np.all(c == d)) - def test_bitwise_xor(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.bitwise_xor(a, b) return c - a = np.array([2,5,255]) - b = np.array([4,4,4]) + a = np.array([2, 5, 255]) + b = np.array([4, 4, 4]) + + with dpctl.device_context("opencl:gpu"): + c = f(a, b) - c = f(a, b) d = np.bitwise_xor(a, b) self.assertTrue(np.all(c == d)) - def test_bitwise_not(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.bitwise_not(a) return c - a = np.array([2,5,255]) + a = np.array([2, 5, 255]) + + with dpctl.device_context("opencl:gpu"): + c = f(a) - c = f(a) d = np.bitwise_not(a) self.assertTrue(np.all(c == d)) - def test_invert(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.invert(a) return c - a = np.array([2,5,255]) + a = np.array([2, 5, 255]) + + with dpctl.device_context("opencl:gpu"): + c = f(a) - c = f(a) d = np.invert(a) self.assertTrue(np.all(c == d)) - def test_left_shift(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.left_shift(a, b) return c - a = np.array([2,3,4]) - b = np.array([1,2,3]) + a = np.array([2, 3, 4]) + b = np.array([1, 2, 3]) + + with dpctl.device_context("opencl:gpu"): + c = f(a, b) - c = f(a, b) d = np.left_shift(a, b) self.assertTrue(np.all(c == d)) - def test_right_shift(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.right_shift(a, b) return c - a = np.array([2,3,4]) - b = np.array([1,2,3]) + a = np.array([2, 3, 4]) + b = np.array([1, 2, 3]) + + with dpctl.device_context("opencl:gpu"): + c = f(a, b) - c = f(a, b) d = np.right_shift(a, b) self.assertTrue(np.all(c == d)) diff --git a/numba_dppy/tests/test_numpy_comparison_functions.py b/numba_dppy/tests/test_numpy_comparison_functions.py index 5daf1fc813..53a8eed890 100644 --- a/numba_dppy/tests/test_numpy_comparison_functions.py +++ b/numba_dppy/tests/test_numpy_comparison_functions.py @@ -5,81 +5,92 @@ import sys import numpy as np from numba import njit -import numba_dppy, numba_dppy as dppy +import numba_dppy +import numba_dppy as dppy +import dpctl from numba_dppy.testing import unittest from numba_dppy.testing import DPPYTestCase + +@unittest.skipUnless(dpctl.has_gpu_queues(), 'test only on GPU system') class TestNumpy_comparison_functions(DPPYTestCase): - a = np.array([4,5,6]) - b = np.array([2,6,6]) + a = np.array([4, 5, 6]) + b = np.array([2, 6, 6]) + def test_greater(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.greater(a, b) return c - c = f(self.a, self.b) + with dpctl.device_context("opencl:gpu"): + c = f(self.a, self.b) + d = np.greater(self.a, self.b) self.assertTrue(np.all(c == d)) - def test_greater_equal(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.greater_equal(a, b) return c - c = f(self.a, self.b) + with dpctl.device_context("opencl:gpu"): + c = f(self.a, self.b) + d = np.greater_equal(self.a, self.b) self.assertTrue(np.all(c == d)) - def test_less(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.less(a, b) return c - c = f(self.a, self.b) + with dpctl.device_context("opencl:gpu"): + c = f(self.a, self.b) + d = np.less(self.a, self.b) self.assertTrue(np.all(c == d)) - def test_less_equal(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.less_equal(a, b) return c - c = f(self.a, self.b) + with dpctl.device_context("opencl:gpu"): + c = f(self.a, self.b) + d = np.less_equal(self.a, self.b) self.assertTrue(np.all(c == d)) - def test_not_equal(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.not_equal(a, b) return c - c = f(self.a, self.b) + with dpctl.device_context("opencl:gpu"): + c = f(self.a, self.b) + d = np.not_equal(self.a, self.b) self.assertTrue(np.all(c == d)) - def test_equal(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.equal(a, b) return c - c = f(self.a, self.b) + with dpctl.device_context("opencl:gpu"): + c = f(self.a, self.b) + d = np.equal(self.a, self.b) self.assertTrue(np.all(c == d)) - def test_logical_and(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.logical_and(a, b) return c @@ -87,13 +98,14 @@ def f(a, b): a = np.array([True, True, False]) b = np.array([True, False, False]) - c = f(a, b) + with dpctl.device_context("opencl:gpu"): + c = f(a, b) + d = np.logical_and(a, b) self.assertTrue(np.all(c == d)) - def test_logical_or(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.logical_or(a, b) return c @@ -101,13 +113,14 @@ def f(a, b): a = np.array([True, True, False]) b = np.array([True, False, False]) - c = f(a, b) + with dpctl.device_context("opencl:gpu"): + c = f(a, b) + d = np.logical_or(a, b) self.assertTrue(np.all(c == d)) - def test_logical_xor(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.logical_xor(a, b) return c @@ -115,76 +128,83 @@ def f(a, b): a = np.array([True, True, False]) b = np.array([True, False, False]) - c = f(a, b) + with dpctl.device_context("opencl:gpu"): + c = f(a, b) + d = np.logical_xor(a, b) self.assertTrue(np.all(c == d)) - def test_logical_not(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.logical_not(a) return c a = np.array([True, True, False]) - c = f(a) + with dpctl.device_context("opencl:gpu"): + c = f(a) + d = np.logical_not(a) self.assertTrue(np.all(c == d)) - def test_maximum(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.maximum(a, b) return c - a = np.array([5,6,7,np.nan], dtype=np.float32) - b = np.array([5,7,6,100], dtype=np.float32) + a = np.array([5, 6, 7, np.nan], dtype=np.float32) + b = np.array([5, 7, 6, 100], dtype=np.float32) + + with dpctl.device_context("opencl:gpu"): + c = f(a, b) - c = f(a, b) d = np.maximum(a, b) np.testing.assert_equal(c, d) - def test_minimum(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.minimum(a, b) return c - a = np.array([5,6,7,np.nan], dtype=np.float32) - b = np.array([5,7,6,100], dtype=np.float32) + a = np.array([5, 6, 7, np.nan], dtype=np.float32) + b = np.array([5, 7, 6, 100], dtype=np.float32) + + with dpctl.device_context("opencl:gpu"): + c = f(a, b) - c = f(a, b) d = np.minimum(a, b) np.testing.assert_equal(c, d) - def test_fmax(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.fmax(a, b) return c - a = np.array([5,6,7,np.nan], dtype=np.float32) - b = np.array([5,7,6,100], dtype=np.float32) + a = np.array([5, 6, 7, np.nan], dtype=np.float32) + b = np.array([5, 7, 6, 100], dtype=np.float32) + + with dpctl.device_context("opencl:gpu"): + c = f(a, b) - c = f(a, b) d = np.fmax(a, b) np.testing.assert_equal(c, d) - def test_fmin(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.fmin(a, b) return c - a = np.array([5,6,7,np.nan], dtype=np.float32) - b = np.array([5,7,6,100], dtype=np.float32) + a = np.array([5, 6, 7, np.nan], dtype=np.float32) + b = np.array([5, 7, 6, 100], dtype=np.float32) + + with dpctl.device_context("opencl:gpu"): + c = f(a, b) - c = f(a, b) d = np.fmin(a, b) np.testing.assert_equal(c, d) diff --git a/numba_dppy/tests/test_numpy_floating_functions.py b/numba_dppy/tests/test_numpy_floating_functions.py index c05c10498d..fb7c1b98e8 100644 --- a/numba_dppy/tests/test_numpy_floating_functions.py +++ b/numba_dppy/tests/test_numpy_floating_functions.py @@ -4,92 +4,102 @@ import sys import numpy as np from numba import njit -import numba_dppy, numba_dppy as dppy +import numba_dppy +import numba_dppy as dppy +import dpctl from numba_dppy.testing import unittest from numba_dppy.testing import DPPYTestCase +@unittest.skipUnless(dpctl.has_gpu_queues(), 'test only on GPU system') class TestNumpy_floating_functions(DPPYTestCase): def test_isfinite(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.isfinite(a) return c - test_arr = [np.log(-1.),1.,np.log(0)] + test_arr = [np.log(-1.), 1., np.log(0)] input_arr = np.asarray(test_arr, dtype=np.float32) - c = f(input_arr) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.isfinite(input_arr) self.assertTrue(np.all(c == d)) - def test_isinf(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.isinf(a) return c - test_arr = [np.log(-1.),1.,np.log(0)] + test_arr = [np.log(-1.), 1., np.log(0)] input_arr = np.asarray(test_arr, dtype=np.float32) - c = f(input_arr) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.isinf(input_arr) self.assertTrue(np.all(c == d)) def test_isnan(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.isnan(a) return c - test_arr = [np.log(-1.),1.,np.log(0)] + test_arr = [np.log(-1.), 1., np.log(0)] input_arr = np.asarray(test_arr, dtype=np.float32) - c = f(input_arr) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.isnan(input_arr) self.assertTrue(np.all(c == d)) - def test_floor(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.floor(a) return c input_arr = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) - c = f(input_arr) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.floor(input_arr) self.assertTrue(np.all(c == d)) - def test_ceil(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.ceil(a) return c input_arr = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) - c = f(input_arr) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.ceil(input_arr) self.assertTrue(np.all(c == d)) - def test_trunc(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.trunc(a) return c input_arr = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) - c = f(input_arr) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.trunc(input_arr) self.assertTrue(np.all(c == d)) - if __name__ == '__main__': unittest.main() diff --git a/numba_dppy/tests/test_numpy_math_functions.py b/numba_dppy/tests/test_numpy_math_functions.py index 155b352c7e..7af014d4d8 100644 --- a/numba_dppy/tests/test_numpy_math_functions.py +++ b/numba_dppy/tests/test_numpy_math_functions.py @@ -5,79 +5,95 @@ import sys import numpy as np from numba import njit -import numba_dppy, numba_dppy as dppy +import numba_dppy +import numba_dppy as dppy +import dpctl from numba_dppy.testing import unittest from numba_dppy.testing import DPPYTestCase +@unittest.skipUnless(dpctl.has_gpu_queues(), 'test only on GPU system') class TestNumpy_math_functions(DPPYTestCase): + N = 10 a = np.array(np.random.random(N), dtype=np.float32) b = np.array(np.random.random(N), dtype=np.float32) def test_add(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.add(a, b) return c - c = f(self.a, self.b) + with dpctl.device_context("opencl:gpu"): + c = f(self.a, self.b) + d = self.a + self.b self.assertTrue(np.all(c == d)) def test_subtract(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.subtract(a, b) return c - c = f(self.a, self.b) + with dpctl.device_context("opencl:gpu"): + c = f(self.a, self.b) + d = self.a - self.b self.assertTrue(np.all(c == d)) def test_multiply(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.multiply(a, b) return c - c = f(self.a, self.b) + with dpctl.device_context("opencl:gpu"): + c = f(self.a, self.b) + d = self.a * self.b self.assertTrue(np.all(c == d)) def test_divide(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.divide(a, b) return c - c = f(self.a, self.b) + with dpctl.device_context("opencl:gpu"): + c = f(self.a, self.b) + d = self.a / self.b max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-2) def test_true_divide(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.true_divide(a, b) return c - c = f(self.a, self.b) + with dpctl.device_context("opencl:gpu"): + c = f(self.a, self.b) + d = np.true_divide(self.a, self.b) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-2) def test_negative(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.negative(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + self.assertTrue(np.all(c == -self.a)) def test_power(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.power(a, b) return c @@ -85,11 +101,13 @@ def f(a, b): input_arr = np.random.randint(self.N, size=(self.N)) exp = np.full((self.N), 2, dtype=np.int) - c = f(input_arr, exp) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr, exp) + self.assertTrue(np.all(c == input_arr * input_arr)) def test_remainder(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.remainder(a, b) return c @@ -97,11 +115,13 @@ def f(a, b): input_arr = np.full((self.N), 3, dtype=np.int) divisor = np.full((self.N), 2, dtype=np.int) - c = f(input_arr, divisor) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr, divisor) + self.assertTrue(np.all(c == 1)) def test_mod(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.mod(a, b) return c @@ -109,11 +129,13 @@ def f(a, b): input_arr = np.full((self.N), 3, dtype=np.int) divisor = np.full((self.N), 2, dtype=np.int) - c = f(input_arr, divisor) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr, divisor) + self.assertTrue(np.all(c == 1)) def test_fmod(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.fmod(a, b) return c @@ -121,173 +143,201 @@ def f(a, b): input_arr = np.full((self.N), 3, dtype=np.float32) divisor = np.full((self.N), 2, dtype=np.int) - c = f(input_arr, divisor) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr, divisor) + self.assertTrue(np.all(c == 1.)) def test_abs(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.abs(a) return c input_arr = 5 * np.random.random_sample(self.N) - 5 - c = f(input_arr) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + self.assertTrue(np.all(c == -input_arr)) def test_absolute(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.absolute(a) return c input_arr = 5 * np.random.random_sample(self.N) - 5 - c = f(input_arr) - self.assertTrue(np.all(c == -input_arr)) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + self.assertTrue(np.all(c == -input_arr)) def test_fabs(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.fabs(a) return c input_arr = 5 * np.random.random_sample(self.N) - 5 - c = f(input_arr) - self.assertTrue(np.all(c == -input_arr)) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + self.assertTrue(np.all(c == -input_arr)) def test_sign(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.sign(a) return c input_arr = 5 * np.random.random_sample(self.N) - 5 - c = f(input_arr) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + self.assertTrue(np.all(c == -1.)) def test_conj(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.conj(a) return c input_arr = np.eye(self.N) + 1j * np.eye(self.N) - c = f(input_arr) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.conj(input_arr) self.assertTrue(np.all(c == d)) def test_exp(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.exp(a) return c input_arr = np.random.randint(self.N, size=(self.N)) - c = f(input_arr) + + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.exp(input_arr) self.assertTrue(np.all(c == d)) - def test_log(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.log(a) return c input_arr = np.random.randint(1, self.N, size=(self.N)) - c = f(input_arr) + + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.log(input_arr) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_log10(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.log10(a) return c input_arr = np.random.randint(1, self.N, size=(self.N)) - c = f(input_arr) + + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.log10(input_arr) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_expm1(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.expm1(a) return c input_arr = np.random.randint(1, self.N, size=(self.N)) - c = f(input_arr) + + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.expm1(input_arr) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_log1p(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.log1p(a) return c input_arr = np.random.randint(1, self.N, size=(self.N)) - c = f(input_arr) + + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.log1p(input_arr) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) def test_sqrt(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.sqrt(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.sqrt(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_square(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.square(a) return c input_arr = np.random.randint(self.N, size=(self.N)) - c = f(input_arr) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + self.assertTrue(np.all(c == input_arr * input_arr)) def test_reciprocal(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.reciprocal(a) return c - input_arr = 5 * np.random.random_sample(self.N) + 5 + input_arr = 5 * np.random.random_sample(self.N) + 5 + + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) - c = f(input_arr) self.assertTrue(np.all(c == 1/input_arr)) def test_conjugate(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.conjugate(a) return c input_arr = np.eye(self.N) + 1j * np.eye(self.N) - c = f(input_arr) + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.conj(input_arr) self.assertTrue(np.all(c == d)) diff --git a/numba_dppy/tests/test_numpy_trigonomteric_functions.py b/numba_dppy/tests/test_numpy_trigonomteric_functions.py index 7ce18b870a..361273cdee 100644 --- a/numba_dppy/tests/test_numpy_trigonomteric_functions.py +++ b/numba_dppy/tests/test_numpy_trigonomteric_functions.py @@ -5,214 +5,239 @@ import sys import numpy as np from numba import njit -import numba_dppy, numba_dppy as dppy +import numba_dppy +import numba_dppy as dppy +import dpctl from numba_dppy.testing import unittest from numba_dppy.testing import DPPYTestCase +@unittest.skipUnless(dpctl.has_gpu_queues(), 'test only on GPU system') class TestNumpy_math_functions(DPPYTestCase): + N = 10 a = np.array(np.random.random(N), dtype=np.float32) b = np.array(np.random.random(N), dtype=np.float32) def test_sin(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.sin(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.sin(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_cos(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.cos(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.cos(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_tan(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.tan(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.tan(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_arcsin(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.arcsin(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.arcsin(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_arccos(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.arccos(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.arccos(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_arctan(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.arctan(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.arctan(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_arctan2(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): c = np.arctan2(a, b) return c - c = f(self.a, self.b) + with dpctl.device_context("opencl:gpu"): + c = f(self.a, self.b) + d = np.arctan2(self.a, self.b) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_sinh(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.sinh(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.sinh(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_cosh(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.cosh(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.cosh(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_tanh(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.tanh(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.tanh(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_arcsinh(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.arcsinh(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.arcsinh(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_arccosh(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.arccosh(a) return c input_arr = np.random.randint(1, self.N, size=(self.N)) - c = f(input_arr) + + with dpctl.device_context("opencl:gpu"): + c = f(input_arr) + d = np.arccosh(input_arr) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_arctanh(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.arctanh(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.arctanh(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_deg2rad(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.deg2rad(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.deg2rad(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) - def test_rad2deg(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.rad2deg(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.rad2deg(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-2) def test_degrees(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.degrees(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.degrees(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-2) def test_radians(self): - @njit(parallel={'offload':True}) + @njit def f(a): c = np.radians(a) return c - c = f(self.a) + with dpctl.device_context("opencl:gpu"): + c = f(self.a) + d = np.radians(self.a) max_abs_err = c.sum() - d.sum() self.assertTrue(max_abs_err < 1e-5) diff --git a/numba_dppy/tests/test_parfor_lower_message.py b/numba_dppy/tests/test_parfor_lower_message.py index 591fd2cb0e..9f4660e01f 100644 --- a/numba_dppy/tests/test_parfor_lower_message.py +++ b/numba_dppy/tests/test_parfor_lower_message.py @@ -1,8 +1,10 @@ import numpy as np import numba from numba import njit, prange -import numba_dppy, numba_dppy as dppy -from numba_dppy.testing import unittest, DPPYTestCase +import numba_dppy +import numba_dppy as dppy +from numba_dppy.testing import unittest +from numba_dppy.testing import DPPYTestCase from numba.tests.support import captured_stdout import dpctl @@ -23,7 +25,7 @@ class TestParforMessage(DPPYTestCase): def test_parfor_message(self): with dpctl.device_context("opencl:gpu") as gpu_queue: numba_dppy.compiler.DEBUG = 1 - jitted = njit(parallel={"offload": True})(prange_example) + jitted = njit(prange_example) with captured_stdout() as got: jitted() diff --git a/numba_dppy/tests/test_prange.py b/numba_dppy/tests/test_prange.py index f4c13c4b1f..3a8948d716 100644 --- a/numba_dppy/tests/test_prange.py +++ b/numba_dppy/tests/test_prange.py @@ -5,16 +5,19 @@ import sys import numpy as np import numba +import dpctl from numba import njit, prange -import numba_dppy, numba_dppy as dppy +import numba_dppy +import numba_dppy as dppy from numba_dppy.testing import unittest, expectedFailureIf from numba_dppy.testing import DPPYTestCase from numba.tests.support import captured_stdout +@unittest.skipUnless(dpctl.has_gpu_queues(), "test only on GPU system") class TestPrange(DPPYTestCase): def test_one_prange(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): for i in prange(4): b[i, 0] = a[i, 0] * 10 @@ -24,14 +27,14 @@ def f(a, b): a = np.ones((m, n)) b = np.ones((m, n)) - f(a, b) + with dpctl.device_context("opencl:gpu"): + f(a, b) for i in range(4): self.assertTrue(b[i, 0] == a[i, 0] * 10) - def test_nested_prange(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): # dimensions must be provided as scalar m, n = a.shape @@ -44,12 +47,13 @@ def f(a, b): a = np.ones((m, n)) b = np.ones((m, n)) - f(a, b) - self.assertTrue(np.all(b == 10)) + with dpctl.device_context("opencl:gpu"): + f(a, b) + self.assertTrue(np.all(b == 10)) def test_multiple_prange(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): # dimensions must be provided as scalar m, n = a.shape @@ -58,7 +62,6 @@ def f(a, b): for j in prange(n): b[i, j] = a[i, j] * val - for i in prange(m): for j in prange(n): a[i, j] = a[i, j] * 10 @@ -68,13 +71,14 @@ def f(a, b): a = np.ones((m, n)) b = np.ones((m, n)) - f(a, b) + with dpctl.device_context("opencl:gpu"): + f(a, b) + self.assertTrue(np.all(b == 10)) self.assertTrue(np.all(a == 10)) - def test_three_prange(self): - @njit(parallel={'offload':True}) + @njit def f(a, b): # dimensions must be provided as scalar m, n, o = a.shape @@ -91,9 +95,10 @@ def f(a, b): a = np.ones((m, n, o)) b = np.ones((m, n, o)) - f(a, b) - self.assertTrue(np.all(b == 12)) + with dpctl.device_context("opencl:gpu"): + f(a, b) + self.assertTrue(np.all(b == 12)) @expectedFailureIf(sys.platform.startswith('win')) def test_two_consequent_prange(self): @@ -110,19 +115,21 @@ def prange_example(): old_debug = numba_dppy.compiler.DEBUG numba_dppy.compiler.DEBUG = 1 - jitted = njit(parallel={'offload':True})(prange_example) - with captured_stdout() as stdout: + jitted = njit(prange_example) + + with captured_stdout() as stdout, dpctl.device_context("opencl:gpu"): jitted_res = jitted() res = prange_example() numba_dppy.compiler.DEBUG = old_debug - self.assertEqual(stdout.getvalue().count('Parfor lowered on DPPY-device'), 2, stdout.getvalue()) - self.assertEqual(stdout.getvalue().count('Failed to lower parfor on DPPY-device'), 0, stdout.getvalue()) + self.assertEqual(stdout.getvalue().count( + 'Parfor lowered on DPPY-device'), 2, stdout.getvalue()) + self.assertEqual(stdout.getvalue().count( + 'Failed to lower parfor on DPPY-device'), 0, stdout.getvalue()) np.testing.assert_equal(res, jitted_res) - @unittest.skip('NRT required but not enabled') def test_2d_arrays(self): def prange_example(): @@ -138,16 +145,19 @@ def prange_example(): old_debug = numba_dppy.compiler.DEBUG numba_dppy.compiler.DEBUG = 1 - jitted = njit(parallel={'offload':True})(prange_example) - with captured_stdout() as stdout: + jitted = njit(prange_example) + + with captured_stdout() as stdout, dpctl.device_context("opencl:gpu"): jitted_res = jitted() res = prange_example() numba_dppy.compiler.DEBUG = old_debug - self.assertEqual(stdout.getvalue().count('Parfor lowered on DPPY-device'), 2, stdout.getvalue()) - self.assertEqual(stdout.getvalue().count('Failed to lower parfor on DPPY-device'), 0, stdout.getvalue()) + self.assertEqual(stdout.getvalue().count( + 'Parfor lowered on DPPY-device'), 2, stdout.getvalue()) + self.assertEqual(stdout.getvalue().count( + 'Failed to lower parfor on DPPY-device'), 0, stdout.getvalue()) np.testing.assert_equal(res, jitted_res) diff --git a/numba_dppy/tests/test_vectorize.py b/numba_dppy/tests/test_vectorize.py index 04891ca296..2fed0fc65f 100644 --- a/numba_dppy/tests/test_vectorize.py +++ b/numba_dppy/tests/test_vectorize.py @@ -5,11 +5,14 @@ import sys import numpy as np from numba import njit, vectorize -import numba_dppy, numba_dppy as dppy +import numba_dppy +import numba_dppy as dppy +import dpctl from numba_dppy.testing import unittest from numba_dppy.testing import DPPYTestCase +@unittest.skipUnless(dpctl.has_gpu_queues(), "test only on GPU system") class TestVectorize(DPPYTestCase): def test_vectorize(self): @@ -17,9 +20,9 @@ def test_vectorize(self): def axy(a, x, y): return a * x + y - @njit(parallel={'offload':True}) + @njit def f(a0, a1): - return np.cos(axy(a0, np.sin(a1) - 1., 1.) ) + return np.cos(axy(a0, np.sin(a1) - 1., 1.)) def f_np(a0, a1): sin_res = np.sin(a1) @@ -28,11 +31,12 @@ def f_np(a0, a1): res.append(axy(a0[i], sin_res[i] - 1., 1.)) return np.cos(np.array(res)) - A = np.random.random(10) B = np.random.random(10) - expected = f(A, B) + with dpctl.device_context("opencl:gpu"): + expected = f(A, B) + actual = f_np(A, B) max_abs_err = expected.sum() - actual.sum() diff --git a/numba_dppy/tests/test_with_context.py b/numba_dppy/tests/test_with_context.py index e025a77784..1f733829b6 100644 --- a/numba_dppy/tests/test_with_context.py +++ b/numba_dppy/tests/test_with_context.py @@ -12,7 +12,6 @@ class TestWithDPPYContext(DPPYTestCase): @unittest.skipIf(not dpctl.has_gpu_queues(), "No GPU platforms available") - @expectedFailureIf(sys.platform.startswith('win')) def test_with_dppy_context_gpu(self): @njit @@ -39,7 +38,6 @@ def func(b): self.assertTrue('Parfor lowered on DPPY-device' in got_gpu_message.getvalue()) @unittest.skipIf(not dpctl.has_cpu_queues(), "No CPU platforms available") - @unittest.expectedFailure def test_with_dppy_context_cpu(self): @njit