diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index e96500ca8c..fb237b4c38 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -547,7 +547,8 @@ def arange( ) _step = (start + step) - start _step = dt.type(_step) - hev, _ = ti._linspace_step(start, _step, res, sycl_queue) + _start = dt.type(start) + hev, _ = ti._linspace_step(_start, _step, res, sycl_queue) hev.wait() return res diff --git a/dpctl/tests/test_sycl_event.py b/dpctl/tests/test_sycl_event.py index bd79ab5b2a..d366f163fb 100644 --- a/dpctl/tests/test_sycl_event.py +++ b/dpctl/tests/test_sycl_event.py @@ -17,13 +17,13 @@ """ Defines unit test cases for the SyclEvent class. """ -import numpy as np import pytest -from helper import create_invalid_capsule, has_cpu +from helper import create_invalid_capsule import dpctl import dpctl.memory as dpctl_mem import dpctl.program as dpctl_prog +import dpctl.tensor as dpt from dpctl import event_status_type as esty @@ -40,14 +40,11 @@ def produce_event(profiling=False): prog = dpctl_prog.create_program_from_source(q, oclSrc) addKernel = prog.get_sycl_kernel("add") - bufBytes = 1024 * np.dtype("i").itemsize - abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) - a = np.ndarray((1024), buffer=abuf, dtype="i") - a[:] = np.arange(1024) - args = [] + n = 1024 * 1024 + a = dpt.arange(n, dtype="i", sycl_queue=q) + args = [a.usm_data] - args.append(a.base) - r = [1024] + r = [n] ev = q.submit(addKernel, args, r) return ev @@ -139,55 +136,55 @@ def test_backend(): pytest.fail("Failed to get backend from event") -@pytest.mark.skip(reason="event::get_wait_list() method returns wrong result") def test_get_wait_list(): - if has_cpu(): - oclSrc = " \ - kernel void add_k(global float* a) { \ - size_t index = get_global_id(0); \ - a[index] = a[index] + 1; \ - } \ - kernel void sqrt_k(global float* a) { \ - size_t index = get_global_id(0); \ - a[index] = sqrt(a[index]); \ - } \ - kernel void sin_k(global float* a) { \ - size_t index = get_global_id(0); \ - a[index] = sin(a[index]); \ - }" + try: q = dpctl.SyclQueue("opencl:cpu") - prog = dpctl_prog.create_program_from_source(q, oclSrc) - addKernel = prog.get_sycl_kernel("add_k") - sqrtKernel = prog.get_sycl_kernel("sqrt_k") - sinKernel = prog.get_sycl_kernel("sin_k") - - bufBytes = 1024 * np.dtype("f").itemsize - abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) - a = np.ndarray((1024), buffer=abuf, dtype="f") - a[:] = np.arange(1024) - args = [] - - args.append(a.base) - r = [1024] - ev_1 = q.submit(addKernel, args, r) - ev_2 = q.submit(sqrtKernel, args, r, dEvents=[ev_1]) - ev_3 = q.submit(sinKernel, args, r, dEvents=[ev_2]) - - try: - wait_list = ev_3.get_wait_list() - except ValueError: - pytest.fail("Failed to get a list of waiting events from SyclEvent") - assert len(wait_list) + except dpctl.SyclQueueCreationError: + pytest.skip("Sycl queue for OpenCL gpu device could not be created.") + oclSrc = " \ + kernel void add_k(global float* a) { \ + size_t index = get_global_id(0); \ + a[index] = a[index] + 1; \ + } \ + kernel void sqrt_k(global float* a) { \ + size_t index = get_global_id(0); \ + a[index] = sqrt(a[index]); \ + } \ + kernel void sin_k(global float* a) { \ + size_t index = get_global_id(0); \ + a[index] = sin(a[index]); \ + }" + prog = dpctl_prog.create_program_from_source(q, oclSrc) + addKernel = prog.get_sycl_kernel("add_k") + sqrtKernel = prog.get_sycl_kernel("sqrt_k") + sinKernel = prog.get_sycl_kernel("sin_k") + + n = 1024 * 1024 + a = dpt.arange(n, dtype="f", sycl_queue=q) + args = [a.usm_data] + + r = [n] + ev_1 = q.submit(addKernel, args, r) + ev_2 = q.submit(sqrtKernel, args, r, dEvents=[ev_1]) + ev_3 = q.submit(sinKernel, args, r, dEvents=[ev_2]) + + try: + wait_list = ev_3.get_wait_list() + except ValueError: + pytest.fail("Failed to get a list of waiting events from SyclEvent") + # FIXME: Due to an issue in underlying runtime the list returns is always + # empty. The proper expectation is `assert len(wait_list) > 0` + assert len(wait_list) >= 0 def test_profiling_info(): - if has_cpu(): + try: event = produce_event(profiling=True) - assert event.profiling_info_submit - assert event.profiling_info_start - assert event.profiling_info_end - else: + except dpctl.SyclQueueCreationError: pytest.skip("No OpenCL CPU queues available") + assert type(event.profiling_info_submit) is int + assert type(event.profiling_info_start) is int + assert type(event.profiling_info_end) is int def test_sycl_timer(): diff --git a/dpctl/tests/test_sycl_kernel_submit.py b/dpctl/tests/test_sycl_kernel_submit.py index b4378c1580..de24c02cd7 100644 --- a/dpctl/tests/test_sycl_kernel_submit.py +++ b/dpctl/tests/test_sycl_kernel_submit.py @@ -60,21 +60,18 @@ def test_create_program_from_source(ctype_str, dtype, ctypes_ctor): n_elems = 1024 * 512 lws = 128 - bufBytes = n_elems * dtype.itemsize - abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) - bbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) - cbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) - a = np.ndarray((n_elems,), buffer=abuf, dtype=dtype) - b = np.ndarray((n_elems,), buffer=bbuf, dtype=dtype) - c = np.ndarray((n_elems,), buffer=cbuf, dtype=dtype) - a[:] = np.arange(n_elems) - b[:] = np.arange(n_elems, 0, -1) - c[:] = 0 + a = dpt.arange(n_elems, dtype=dtype, sycl_queue=q) + b = dpt.arange(n_elems, stop=0, step=-1, dtype=dtype, sycl_queue=q) + c = dpt.zeros(n_elems, dtype=dtype, sycl_queue=q) + d = 2 - args = [a.base, b.base, c.base, ctypes_ctor(d)] + args = [a.usm_data, b.usm_data, c.usm_data, ctypes_ctor(d)] assert n_elems % lws == 0 + b_np = dpt.asnumpy(b) + a_np = dpt.asnumpy(a) + for r in ( [ n_elems, @@ -86,10 +83,10 @@ def test_create_program_from_source(ctype_str, dtype, ctypes_ctor): timer = dpctl.SyclTimer() with timer(q): q.submit(axpyKernel, args, r).wait() - ref_c = a * np.array(d, dtype=dtype) + b + ref_c = a_np * np.array(d, dtype=dtype) + b_np host_dt, device_dt = timer.dt assert type(host_dt) is float and type(device_dt) is float - assert np.allclose(c, ref_c), "Failed for {}".format(r) + assert np.allclose(dpt.asnumpy(c), ref_c), "Failed for {}".format(r) for gr, lr in ( ( @@ -105,10 +102,12 @@ def test_create_program_from_source(ctype_str, dtype, ctypes_ctor): timer = dpctl.SyclTimer() with timer(q): q.submit(axpyKernel, args, gr, lr, [dpctl.SyclEvent()]).wait() - ref_c = a * np.array(d, dtype=dtype) + b + ref_c = a_np * np.array(d, dtype=dtype) + b_np host_dt, device_dt = timer.dt assert type(host_dt) is float and type(device_dt) is float - assert np.allclose(c, ref_c), "Failed for {}, {}".formatg(r, lr) + assert np.allclose(dpt.asnumpy(c), ref_c), "Failed for {}, {}".formatg( + r, lr + ) def test_async_submit(): diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 63e2958586..c289e998d0 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -985,11 +985,13 @@ def test_arange(dt): elif np.issubdtype(dt, np.complexfloating): assert complex(X[47]) == 47.0 + 0.0j - X1 = dpt.arange(4, dtype=dt, sycl_queue=q) - assert X1.shape == (4,) + # choose size larger than maximal value that u1/u2 can accomodate + sz = int(np.iinfo(np.int16).max) + 1 + X1 = dpt.arange(sz, dtype=dt, sycl_queue=q) + assert X1.shape == (sz,) - X2 = dpt.arange(4, 0, -1, dtype=dt, sycl_queue=q) - assert X2.shape == (4,) + X2 = dpt.arange(sz, 0, -1, dtype=dt, sycl_queue=q) + assert X2.shape == (sz,) @pytest.mark.parametrize(