From 5f4982768322831e3628217fffa7ccf7b7413720 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 8 Aug 2022 12:07:59 -0500 Subject: [PATCH 1/4] linspace_affine should not use double precision type in kernels is HW does not support it --- dpctl/tensor/libtensor/source/tensor_py.cpp | 28 ++++++++++++++------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index 4ef68a30a0..b3df82f54b 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -43,7 +43,7 @@ template class copy_cast_from_host_kernel; template class copy_cast_spec_kernel; template class copy_for_reshape_generic_kernel; template class linear_sequence_step_kernel; -template class linear_sequence_affine_kernel; +template class linear_sequence_affine_kernel; static dpctl::tensor::detail::usm_ndarray_types array_types; @@ -1526,7 +1526,7 @@ typedef sycl::event (*lin_space_affine_fn_ptr_t)( static lin_space_affine_fn_ptr_t lin_space_affine_dispatch_vector[_ns::num_types]; -template class LinearSequenceAffineFunctor +template class LinearSequenceAffineFunctor { private: Ty *p = nullptr; @@ -1544,8 +1544,8 @@ template class LinearSequenceAffineFunctor void operator()(sycl::id<1> wiid) const { auto i = wiid.get(0); - double wc = double(i) / n; - double w = double(n - i) / n; + wTy wc = wTy(i) / n; + wTy w = wTy(n - i) / n; if constexpr (is_complex::value) { auto _w = static_cast(w); auto _wc = static_cast(wc); @@ -1578,13 +1578,23 @@ sycl::event lin_space_affine_impl(sycl::queue exec_q, throw; } + bool device_supports_doubles = exec_q.get_device().has(sycl::aspect::fp64); sycl::event lin_space_affine_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); - cgh.parallel_for>( - sycl::range<1>{nelems}, - LinearSequenceAffineFunctor(array_data, start_v, end_v, - (include_endpoint) ? nelems - 1 - : nelems)); + if (device_supports_doubles) { + cgh.parallel_for>( + sycl::range<1>{nelems}, + LinearSequenceAffineFunctor( + array_data, start_v, end_v, + (include_endpoint) ? nelems - 1 : nelems)); + } + else { + cgh.parallel_for>( + sycl::range<1>{nelems}, + LinearSequenceAffineFunctor( + array_data, start_v, end_v, + (include_endpoint) ? nelems - 1 : nelems)); + } }); return lin_space_affine_event; From 290230aa478d7da2ea4b87f08faf756b8e4fa34f Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 8 Aug 2022 12:08:26 -0500 Subject: [PATCH 2/4] test_linspace should not try double precision if HW does not support it --- dpctl/tests/test_usm_ndarray_ctor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 9b4759ae73..5d52694ca4 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1080,6 +1080,8 @@ def test_linspace(dt): q = dpctl.SyclQueue() except dpctl.SyclQueueCreationError: pytest.skip("Default queue could not be created") + if dt in ["f8", "c16"] and not q.sycl_device.has_aspect_fp64: + pytest.skip("Device does not support double precision") X = dpt.linspace(0, 1, num=2, dtype=dt, sycl_queue=q) assert np.allclose(dpt.asnumpy(X), np.linspace(0, 1, num=2, dtype=dt)) From f6db04e05d64343d0ce23cf37879f715957fba8f Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 8 Aug 2022 12:30:31 -0500 Subject: [PATCH 3/4] Fix for copy of double precision NumPy array to device w/o HW support for DP Routine to copy NumPy array to USM array casts double to single precision on host is sycl device does not support double precision --- dpctl/tensor/_copy_utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index ec2c63d604..dd6f068596 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -81,8 +81,15 @@ def _copy_from_numpy_into(dst, np_ary): if not isinstance(dst, dpt.usm_ndarray): raise TypeError("Expected usm_ndarray, got {}".format(type(dst))) src_ary = np.broadcast_to(np_ary, dst.shape) + copy_q = dst.sycl_queue + if copy_q.sycl_device.has_aspect_fp64 is False: + src_ary_dt_c = src_ary.dtype.char + if src_ary_dt_c == "d": + src_ary = src_ary.astype(np.float32) + elif src_ary_dt_c == "D": + src_ary = src_ary.astype(np.complex64) ti._copy_numpy_ndarray_into_usm_ndarray( - src=src_ary, dst=dst, sycl_queue=dst.sycl_queue + src=src_ary, dst=dst, sycl_queue=copy_q ) From e363969df9b9005267363caa6ab6e65782410585 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 8 Aug 2022 12:48:07 -0500 Subject: [PATCH 4/4] Tests should not use double precision arrays where it is not essential for testing --- dpctl/tests/test_usm_ndarray_manipulation.py | 42 ++++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/dpctl/tests/test_usm_ndarray_manipulation.py b/dpctl/tests/test_usm_ndarray_manipulation.py index 0dd4ccc9d7..038ac007c8 100644 --- a/dpctl/tests/test_usm_ndarray_manipulation.py +++ b/dpctl/tests/test_usm_ndarray_manipulation.py @@ -152,7 +152,7 @@ def test_expand_dims_tuple(axes): except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") - Xnp = np.empty((3, 3, 3)) + Xnp = np.empty((3, 3, 3), dtype="u1") X = dpt.asarray(Xnp, sycl_queue=q) Y = dpt.expand_dims(X, axes) Ynp = np.expand_dims(Xnp, axes) @@ -234,7 +234,7 @@ def test_squeeze_without_axes(shapes): except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") - Xnp = np.empty(shapes) + Xnp = np.empty(shapes, dtype="u1") X = dpt.asarray(Xnp, sycl_queue=q) Y = dpt.squeeze(X) Ynp = Xnp.squeeze() @@ -248,7 +248,7 @@ def test_squeeze_axes_arg(axes): except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") - Xnp = np.array([[[1], [2], [3]]]) + Xnp = np.array([[[1], [2], [3]]], dtype="u1") X = dpt.asarray(Xnp, sycl_queue=q) Y = dpt.squeeze(X, axes) Ynp = Xnp.squeeze(axes) @@ -262,7 +262,7 @@ def test_squeeze_axes_arg_error(axes): except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") - Xnp = np.array([[[1], [2], [3]]]) + Xnp = np.array([[[1], [2], [3]]], dtype="u1") X = dpt.asarray(Xnp, sycl_queue=q) pytest.raises(ValueError, dpt.squeeze, X, axes) @@ -270,21 +270,21 @@ def test_squeeze_axes_arg_error(axes): @pytest.mark.parametrize( "data", [ - [np.array(0), (0,)], - [np.array(0), (1,)], - [np.array(0), (3,)], - [np.ones(1), (1,)], - [np.ones(1), (2,)], - [np.ones(1), (1, 2, 3)], - [np.arange(3), (3,)], - [np.arange(3), (1, 3)], - [np.arange(3), (2, 3)], - [np.ones(0), 0], - [np.ones(1), 1], - [np.ones(1), 2], - [np.ones(1), (0,)], - [np.ones((1, 2)), (0, 2)], - [np.ones((2, 1)), (2, 0)], + [np.array(0, dtype="u1"), (0,)], + [np.array(0, dtype="u1"), (1,)], + [np.array(0, dtype="u1"), (3,)], + [np.ones(1, dtype="u1"), (1,)], + [np.ones(1, dtype="u1"), (2,)], + [np.ones(1, dtype="u1"), (1, 2, 3)], + [np.arange(3, dtype="u1"), (3,)], + [np.arange(3, dtype="u1"), (1, 3)], + [np.arange(3, dtype="u1"), (2, 3)], + [np.ones(0, dtype="u1"), 0], + [np.ones(1, dtype="u1"), 1], + [np.ones(1, dtype="u1"), 2], + [np.ones(1, dtype="u1"), (0,)], + [np.ones((1, 2), dtype="u1"), (0, 2)], + [np.ones((2, 1), dtype="u1"), (2, 0)], ], ) def test_broadcast_to_succeeds(data): @@ -323,7 +323,7 @@ def test_broadcast_to_raises(data): pytest.skip("Queue could not be created") orig_shape, target_shape = data - Xnp = np.zeros(orig_shape) + Xnp = np.zeros(orig_shape, dtype="i1") X = dpt.asarray(Xnp, sycl_queue=q) pytest.raises(ValueError, dpt.broadcast_to, X, target_shape) @@ -333,7 +333,7 @@ def assert_broadcast_correct(input_shapes): q = dpctl.SyclQueue() except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") - np_arrays = [np.zeros(s) for s in input_shapes] + np_arrays = [np.zeros(s, dtype="i1") for s in input_shapes] out_np_arrays = np.broadcast_arrays(*np_arrays) usm_arrays = [dpt.asarray(Xnp, sycl_queue=q) for Xnp in np_arrays] out_usm_arrays = dpt.broadcast_arrays(*usm_arrays)