Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion dpctl/tensor/_ctors.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,8 @@ def arange(
)
_step = (start + step) - start
_step = dt.type(_step)
hev, _ = ti._linspace_step(start, _step, res, sycl_queue)
_start = dt.type(start)
hev, _ = ti._linspace_step(_start, _step, res, sycl_queue)
hev.wait()
return res

Expand Down
99 changes: 48 additions & 51 deletions dpctl/tests/test_sycl_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
""" Defines unit test cases for the SyclEvent class.
"""

import numpy as np
import pytest
from helper import create_invalid_capsule, has_cpu
from helper import create_invalid_capsule

import dpctl
import dpctl.memory as dpctl_mem
import dpctl.program as dpctl_prog
import dpctl.tensor as dpt
from dpctl import event_status_type as esty


Expand All @@ -40,14 +40,11 @@ def produce_event(profiling=False):
prog = dpctl_prog.create_program_from_source(q, oclSrc)
addKernel = prog.get_sycl_kernel("add")

bufBytes = 1024 * np.dtype("i").itemsize
abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
a = np.ndarray((1024), buffer=abuf, dtype="i")
a[:] = np.arange(1024)
args = []
n = 1024 * 1024
a = dpt.arange(n, dtype="i", sycl_queue=q)
args = [a.usm_data]

args.append(a.base)
r = [1024]
r = [n]
ev = q.submit(addKernel, args, r)

return ev
Expand Down Expand Up @@ -139,55 +136,55 @@ def test_backend():
pytest.fail("Failed to get backend from event")


@pytest.mark.skip(reason="event::get_wait_list() method returns wrong result")
def test_get_wait_list():
if has_cpu():
oclSrc = " \
kernel void add_k(global float* a) { \
size_t index = get_global_id(0); \
a[index] = a[index] + 1; \
} \
kernel void sqrt_k(global float* a) { \
size_t index = get_global_id(0); \
a[index] = sqrt(a[index]); \
} \
kernel void sin_k(global float* a) { \
size_t index = get_global_id(0); \
a[index] = sin(a[index]); \
}"
try:
q = dpctl.SyclQueue("opencl:cpu")
prog = dpctl_prog.create_program_from_source(q, oclSrc)
addKernel = prog.get_sycl_kernel("add_k")
sqrtKernel = prog.get_sycl_kernel("sqrt_k")
sinKernel = prog.get_sycl_kernel("sin_k")

bufBytes = 1024 * np.dtype("f").itemsize
abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
a = np.ndarray((1024), buffer=abuf, dtype="f")
a[:] = np.arange(1024)
args = []

args.append(a.base)
r = [1024]
ev_1 = q.submit(addKernel, args, r)
ev_2 = q.submit(sqrtKernel, args, r, dEvents=[ev_1])
ev_3 = q.submit(sinKernel, args, r, dEvents=[ev_2])

try:
wait_list = ev_3.get_wait_list()
except ValueError:
pytest.fail("Failed to get a list of waiting events from SyclEvent")
assert len(wait_list)
except dpctl.SyclQueueCreationError:
pytest.skip("Sycl queue for OpenCL gpu device could not be created.")
oclSrc = " \
kernel void add_k(global float* a) { \
size_t index = get_global_id(0); \
a[index] = a[index] + 1; \
} \
kernel void sqrt_k(global float* a) { \
size_t index = get_global_id(0); \
a[index] = sqrt(a[index]); \
} \
kernel void sin_k(global float* a) { \
size_t index = get_global_id(0); \
a[index] = sin(a[index]); \
}"
prog = dpctl_prog.create_program_from_source(q, oclSrc)
addKernel = prog.get_sycl_kernel("add_k")
sqrtKernel = prog.get_sycl_kernel("sqrt_k")
sinKernel = prog.get_sycl_kernel("sin_k")

n = 1024 * 1024
a = dpt.arange(n, dtype="f", sycl_queue=q)
args = [a.usm_data]

r = [n]
ev_1 = q.submit(addKernel, args, r)
ev_2 = q.submit(sqrtKernel, args, r, dEvents=[ev_1])
ev_3 = q.submit(sinKernel, args, r, dEvents=[ev_2])

try:
wait_list = ev_3.get_wait_list()
except ValueError:
pytest.fail("Failed to get a list of waiting events from SyclEvent")
# FIXME: Due to an issue in underlying runtime the list returns is always
# empty. The proper expectation is `assert len(wait_list) > 0`
assert len(wait_list) >= 0


def test_profiling_info():
if has_cpu():
try:
event = produce_event(profiling=True)
assert event.profiling_info_submit
assert event.profiling_info_start
assert event.profiling_info_end
else:
except dpctl.SyclQueueCreationError:
pytest.skip("No OpenCL CPU queues available")
assert type(event.profiling_info_submit) is int
assert type(event.profiling_info_start) is int
assert type(event.profiling_info_end) is int


def test_sycl_timer():
Expand Down
29 changes: 14 additions & 15 deletions dpctl/tests/test_sycl_kernel_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,18 @@ def test_create_program_from_source(ctype_str, dtype, ctypes_ctor):

n_elems = 1024 * 512
lws = 128
bufBytes = n_elems * dtype.itemsize
abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
bbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
cbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
a = np.ndarray((n_elems,), buffer=abuf, dtype=dtype)
b = np.ndarray((n_elems,), buffer=bbuf, dtype=dtype)
c = np.ndarray((n_elems,), buffer=cbuf, dtype=dtype)
a[:] = np.arange(n_elems)
b[:] = np.arange(n_elems, 0, -1)
c[:] = 0
a = dpt.arange(n_elems, dtype=dtype, sycl_queue=q)
b = dpt.arange(n_elems, stop=0, step=-1, dtype=dtype, sycl_queue=q)
c = dpt.zeros(n_elems, dtype=dtype, sycl_queue=q)

d = 2
args = [a.base, b.base, c.base, ctypes_ctor(d)]
args = [a.usm_data, b.usm_data, c.usm_data, ctypes_ctor(d)]

assert n_elems % lws == 0

b_np = dpt.asnumpy(b)
a_np = dpt.asnumpy(a)

for r in (
[
n_elems,
Expand All @@ -86,10 +83,10 @@ def test_create_program_from_source(ctype_str, dtype, ctypes_ctor):
timer = dpctl.SyclTimer()
with timer(q):
q.submit(axpyKernel, args, r).wait()
ref_c = a * np.array(d, dtype=dtype) + b
ref_c = a_np * np.array(d, dtype=dtype) + b_np
host_dt, device_dt = timer.dt
assert type(host_dt) is float and type(device_dt) is float
assert np.allclose(c, ref_c), "Failed for {}".format(r)
assert np.allclose(dpt.asnumpy(c), ref_c), "Failed for {}".format(r)

for gr, lr in (
(
Expand All @@ -105,10 +102,12 @@ def test_create_program_from_source(ctype_str, dtype, ctypes_ctor):
timer = dpctl.SyclTimer()
with timer(q):
q.submit(axpyKernel, args, gr, lr, [dpctl.SyclEvent()]).wait()
ref_c = a * np.array(d, dtype=dtype) + b
ref_c = a_np * np.array(d, dtype=dtype) + b_np
host_dt, device_dt = timer.dt
assert type(host_dt) is float and type(device_dt) is float
assert np.allclose(c, ref_c), "Failed for {}, {}".formatg(r, lr)
assert np.allclose(dpt.asnumpy(c), ref_c), "Failed for {}, {}".formatg(
r, lr
)


def test_async_submit():
Expand Down
10 changes: 6 additions & 4 deletions dpctl/tests/test_usm_ndarray_ctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,11 +985,13 @@ def test_arange(dt):
elif np.issubdtype(dt, np.complexfloating):
assert complex(X[47]) == 47.0 + 0.0j

X1 = dpt.arange(4, dtype=dt, sycl_queue=q)
assert X1.shape == (4,)
# choose size larger than maximal value that u1/u2 can accomodate
sz = int(np.iinfo(np.int16).max) + 1
X1 = dpt.arange(sz, dtype=dt, sycl_queue=q)
assert X1.shape == (sz,)

X2 = dpt.arange(4, 0, -1, dtype=dt, sycl_queue=q)
assert X2.shape == (4,)
X2 = dpt.arange(sz, 0, -1, dtype=dt, sycl_queue=q)
assert X2.shape == (sz,)


@pytest.mark.parametrize(
Expand Down