From 91f35d0bf9bf4e5a40373c03e4b556ca9aa11af2 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 3 Mar 2023 16:12:11 +0100 Subject: [PATCH 1/5] Fix where operator for support passing 1 arg --- dpnp/backend/kernels/dpnp_krnl_indexing.cpp | 1 + dpnp/dpnp_iface_searching.py | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp index 0b80ac678d34..5c3fb76e23be 100644 --- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp @@ -1021,6 +1021,7 @@ void func_map_init_indexing_func(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_NONZERO][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_nonzero_default_c}; fmap[DPNPFuncName::DPNP_FN_NONZERO][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_nonzero_default_c}; + fmap[DPNPFuncName::DPNP_FN_NONZERO_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_nonzero_ext_c}; fmap[DPNPFuncName::DPNP_FN_NONZERO_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_nonzero_ext_c}; fmap[DPNPFuncName::DPNP_FN_NONZERO_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_nonzero_ext_c}; fmap[DPNPFuncName::DPNP_FN_NONZERO_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_nonzero_ext_c}; diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py index cef5d686035b..ad349793ae66 100644 --- a/dpnp/dpnp_iface_searching.py +++ b/dpnp/dpnp_iface_searching.py @@ -184,4 +184,14 @@ def where(condition, x=None, y=None): """ + missing = (x is None, y is None).count(True) + if missing == 1: + raise ValueError("Must provide both 'x' and 'y' or neither.") + if missing == 2: + condition_desc = dpnp.get_dpnp_descriptor(condition, copy_when_nondefault_queue=False) + if condition_desc: + return dpnp_nonzero(condition_desc) + + return call_origin(numpy.nonzero, condition) + return call_origin(numpy.where, condition, x, y) From 61c2db3c34c2db826b597fd00e7d0a3c0eaebddf Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 3 Mar 2023 16:13:58 +0100 Subject: [PATCH 2/5] Unskip and fix tests for where operator --- tests/skipped_tests.tbl | 7 +------ tests/skipped_tests_gpu.tbl | 9 --------- tests/third_party/cupy/sorting_tests/test_search.py | 1 + 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index d598ea2ca9fd..0879c0f6e979 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -364,7 +364,7 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestArrayPreservationOf tests/third_party/cupy/creation_tests/test_from_data.py::TestArrayPreservationOfShape_param_7_{copy=True, ndmin=3, xp=dpnp}::test_cupy_array tests/third_party/cupy/creation_tests/test_from_data.py::TestArrayPreservationOfShape_param_8_{copy=False, ndmin=0, xp=numpy}::test_cupy_array tests/third_party/cupy/creation_tests/test_from_data.py::TestArrayPreservationOfShape_param_9_{copy=False, ndmin=0, xp=dpnp}::test_cupy_array -tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_array_copy_is_copied +tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_array_copy_is_copied tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_array_copy_list_of_cupy_with_dtype tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_array_copy_list_of_cupy_with_dtype_char tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_array_copy_list_of_numpy_with_dtype @@ -1073,11 +1073,6 @@ tests/third_party/cupy/sorting_tests/test_search.py::TestNonzeroZeroDimension_pa tests/third_party/cupy/sorting_tests/test_search.py::TestNonzeroZeroDimension_param_1_{array=array(1)}::test_nonzero tests/third_party/cupy/sorting_tests/test_search.py::TestSearch::test_argmax_zero_size tests/third_party/cupy/sorting_tests/test_search.py::TestSearch::test_argmin_zero_size -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_0_{cond_shape=(2, 3, 4)}::test_where_cond -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_1_{cond_shape=(4,)}::test_where_cond -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_2_{cond_shape=(2, 3, 4)}::test_where_cond -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_3_{cond_shape=(3, 4)}::test_where_cond -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereError::test_one_argument tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_axis tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_invalid_axis1 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_invalid_axis2 diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 3dedcff4af04..635f6c156cb6 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1277,15 +1277,6 @@ tests/third_party/cupy/sorting_tests/test_search.py::TestNonzeroZeroDimension_pa tests/third_party/cupy/sorting_tests/test_search.py::TestNonzeroZeroDimension_param_1_{array=array(1)}::test_nonzero tests/third_party/cupy/sorting_tests/test_search.py::TestSearch::test_argmax_zero_size tests/third_party/cupy/sorting_tests/test_search.py::TestSearch::test_argmin_zero_size -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_0_{cond_shape=(2, 3, 4)}::test_where_cond -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_1_{cond_shape=(4,)}::test_where_cond -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_2_{cond_shape=(2, 3, 4)}::test_where_cond -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_3_{cond_shape=(3, 4)}::test_where_cond -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereError::test_one_argument -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereTwoArrays_param_0_{cond_shape=(2, 3, 4), x_shape=(2, 3, 4), y_shape=(2, 3, 4)}::test_where_two_arrays -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereTwoArrays_param_1_{cond_shape=(4,), x_shape=(2, 3, 4), y_shape=(2, 3, 4)}::test_where_two_arrays -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereTwoArrays_param_2_{cond_shape=(2, 3, 4), x_shape=(2, 3, 4), y_shape=(3, 4)}::test_where_two_arrays -tests/third_party/cupy/sorting_tests/test_search.py::TestWhereTwoArrays_param_3_{cond_shape=(3, 4), x_shape=(2, 3, 4), y_shape=(4,)}::test_where_two_arrays tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_axis tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_invalid_axis1 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_invalid_axis2 diff --git a/tests/third_party/cupy/sorting_tests/test_search.py b/tests/third_party/cupy/sorting_tests/test_search.py index 838f559ed8cf..b0b7f94617a6 100644 --- a/tests/third_party/cupy/sorting_tests/test_search.py +++ b/tests/third_party/cupy/sorting_tests/test_search.py @@ -262,6 +262,7 @@ def test_argminmax_dtype(self, in_dtype, result_dtype): {'cond_shape': (2, 3, 4), 'x_shape': (2, 3, 4), 'y_shape': (3, 4)}, {'cond_shape': (3, 4), 'x_shape': (2, 3, 4), 'y_shape': (4,)}, ) +@pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.gpu class TestWhereTwoArrays(unittest.TestCase): From e2022e4278c4e21ccb87dcfa983f79645c386e20 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Sun, 5 Mar 2023 10:56:13 -0600 Subject: [PATCH 3/5] Add support of dpnp.where() with x and y arguments --- dpnp/backend/include/dpnp_iface.hpp | 52 ++++ dpnp/backend/include/dpnp_iface_fptr.hpp | 1 + dpnp/backend/kernels/dpnp_krnl_searching.cpp | 257 +++++++++++++++++- dpnp/dpnp_algo/dpnp_algo.pxd | 2 + dpnp/dpnp_algo/dpnp_algo_searching.pyx | 106 +++++++- dpnp/dpnp_iface_searching.py | 58 +++- .../cupy/sorting_tests/test_search.py | 4 +- 7 files changed, 467 insertions(+), 13 deletions(-) diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp index 7a80b40a3d2e..a124b9e07a11 100644 --- a/dpnp/backend/include/dpnp_iface.hpp +++ b/dpnp/backend/include/dpnp_iface.hpp @@ -57,6 +57,7 @@ typedef ssize_t shape_elem_type; #include +#include "dpnp_iface_fptr.hpp" #include "dpnp_iface_fft.hpp" #include "dpnp_iface_random.hpp" @@ -1683,6 +1684,57 @@ INP_DLLEXPORT void dpnp_var_c(void* array, size_t naxis, size_t ddof); +/** + * @ingroup BACKEND_API + * @brief Implementation of where function + * + * @param [in] q_ref Reference to SYCL queue. + * @param [out] result_out Output array. + * @param [in] result_size Size of output array. + * @param [in] result_ndim Number of output array dimensions. + * @param [in] result_shape Shape of output array. + * @param [in] result_strides Strides of output array. + * @param [in] condition_in Condition array. + * @param [in] condition_size Size of condition array. + * @param [in] condition_ndim Number of condition array dimensions. + * @param [in] condition_shape Shape of condition array. + * @param [in] condition_strides Strides of condition array. + * @param [in] input1_in First input array. + * @param [in] input1_size Size of first input array. + * @param [in] input1_ndim Number of first input array dimensions. + * @param [in] input1_shape Shape of first input array. + * @param [in] input1_strides Strides of first input array. + * @param [in] input2_in Second input array. + * @param [in] input2_size Size of second input array. + * @param [in] input2_ndim Number of second input array dimensions. + * @param [in] input2_shape Shape of second input array. + * @param [in] input2_strides Strides of second input array. + * @param [in] dep_event_vec_ref Reference to vector of SYCL events. + */ +template +INP_DLLEXPORT DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref, + void* result_out, + const size_t result_size, + const size_t result_ndim, + const shape_elem_type* result_shape, + const shape_elem_type* result_strides, + const void* condition_in, + const size_t condition_size, + const size_t condition_ndim, + const shape_elem_type* condition_shape, + const shape_elem_type* condition_strides, + const void* input1_in, + const size_t input1_size, + const size_t input1_ndim, + const shape_elem_type* input1_shape, + const shape_elem_type* input1_strides, + const void* input2_in, + const size_t input2_size, + const size_t input2_ndim, + const shape_elem_type* input2_shape, + const shape_elem_type* input2_strides, + const DPCTLEventVectorRef dep_event_vec_ref); + /** * @ingroup BACKEND_API * @brief Implementation of invert function diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index fb154fcabfac..517338b05dea 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -377,6 +377,7 @@ enum class DPNPFuncName : size_t DPNP_FN_VANDER_EXT, /**< Used in numpy.vander() impl, requires extra parameters */ DPNP_FN_VAR, /**< Used in numpy.var() impl */ DPNP_FN_VAR_EXT, /**< Used in numpy.var() impl, requires extra parameters */ + DPNP_FN_WHERE_EXT, /**< Used in numpy.var() impl, requires extra parameters */ DPNP_FN_ZEROS, /**< Used in numpy.zeros() impl */ DPNP_FN_ZEROS_LIKE, /**< Used in numpy.zeros_like() impl */ DPNP_FN_LAST, /**< The latest element of the enumeration */ diff --git a/dpnp/backend/kernels/dpnp_krnl_searching.cpp b/dpnp/backend/kernels/dpnp_krnl_searching.cpp index 39156ea07c44..fef5f78d15da 100644 --- a/dpnp/backend/kernels/dpnp_krnl_searching.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_searching.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -27,6 +27,7 @@ #include #include "dpnp_fptr.hpp" +#include "dpnp_iterator.hpp" #include "dpnpc_memory_adapter.hpp" #include "queue_sycl.hpp" @@ -139,6 +140,258 @@ DPCTLSyclEventRef (*dpnp_argmin_ext_c)(DPCTLSyclQueueRef, size_t, const DPCTLEventVectorRef) = dpnp_argmin_c<_DataType, _idx_DataType>; + +template +class dpnp_where_c_broadcast_kernel; + +template +class dpnp_where_c_strides_kernel; + +template +class dpnp_where_c_kernel; + +template +DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref, + void* result_out, + const size_t result_size, + const size_t result_ndim, + const shape_elem_type* result_shape, + const shape_elem_type* result_strides, + const void* condition_in, + const size_t condition_size, + const size_t condition_ndim, + const shape_elem_type* condition_shape, + const shape_elem_type* condition_strides, + const void* input1_in, + const size_t input1_size, + const size_t input1_ndim, + const shape_elem_type* input1_shape, + const shape_elem_type* input1_strides, + const void* input2_in, + const size_t input2_size, + const size_t input2_ndim, + const shape_elem_type* input2_shape, + const shape_elem_type* input2_strides, + const DPCTLEventVectorRef dep_event_vec_ref) +{ + /* avoid warning unused variable*/ + (void)dep_event_vec_ref; + + DPCTLSyclEventRef event_ref = nullptr; + + if (!condition_size || !input1_size || !input2_size) + { + return event_ref; + } + + sycl::queue q = *(reinterpret_cast(q_ref)); + + bool* condition_data = static_cast(const_cast(condition_in)); + _DataType_input1* input1_data = static_cast<_DataType_input1*>(const_cast(input1_in)); + _DataType_input2* input2_data = static_cast<_DataType_input2*>(const_cast(input2_in)); + _DataType_output* result = static_cast<_DataType_output*>(result_out); + + bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim); + use_broadcasting = use_broadcasting || !array_equal(condition_shape, condition_ndim, input1_shape, input1_ndim); + use_broadcasting = use_broadcasting || !array_equal(condition_shape, condition_ndim, input2_shape, input2_ndim); + + shape_elem_type* condition_shape_offsets = new shape_elem_type[condition_ndim]; + + get_shape_offsets_inkernel(condition_shape, condition_ndim, condition_shape_offsets); + bool use_strides = !array_equal(condition_strides, condition_ndim, condition_shape_offsets, condition_ndim); + delete[] condition_shape_offsets; + + shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim]; + + get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets); + use_strides = use_strides || !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim); + delete[] input1_shape_offsets; + + shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim]; + + get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets); + use_strides = use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim); + delete[] input2_shape_offsets; + + sycl::event event; + sycl::range<1> gws(result_size); + + if (use_broadcasting) + { + DPNPC_id* condition_it; + const size_t condition_it_it_size_in_bytes = sizeof(DPNPC_id); + condition_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, condition_it_it_size_in_bytes)); + new (condition_it) DPNPC_id(q_ref, condition_data, condition_shape, condition_strides, condition_ndim); + + condition_it->broadcast_to_shape(result_shape, result_ndim); + + DPNPC_id<_DataType_input1>* input1_it; + const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>); + input1_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes)); + new (input1_it) DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim); + + input1_it->broadcast_to_shape(result_shape, result_ndim); + + DPNPC_id<_DataType_input2>* input2_it; + const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>); + input2_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes)); + new (input2_it) DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim); + + input2_it->broadcast_to_shape(result_shape, result_ndim); + + auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { + const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ + { + const bool condition = (*condition_it)[i]; + const _DataType_output input1_elem = (*input1_it)[i]; + const _DataType_output input2_elem = (*input2_it)[i]; + result[i] = (condition) ? input1_elem : input2_elem; + } + }; + auto kernel_func = [&](sycl::handler& cgh) { + cgh.parallel_for>( + gws, kernel_parallel_for_func); + }; + + q.submit(kernel_func).wait(); + + condition_it->~DPNPC_id(); + input1_it->~DPNPC_id(); + input2_it->~DPNPC_id(); + + return event_ref; + } + else if (use_strides) + { + if ((result_ndim != condition_ndim) || (result_ndim != input1_ndim) || (result_ndim != input2_ndim)) + { + throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) + + " mismatches with either condition ndim=" + std::to_string(condition_ndim) + + " or input1 ndim=" + std::to_string(input1_ndim) + + " or input2 ndim=" + std::to_string(input2_ndim)); + } + + /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */ + using usm_host_allocatorT = sycl::usm_allocator; + + size_t strides_size = 4 * result_ndim; + shape_elem_type* dev_strides_data = sycl::malloc_device(strides_size, q); + + /* create host temporary for packed strides managed by shared pointer */ + auto strides_host_packed = + std::vector(strides_size, usm_host_allocatorT(q)); + + /* packed vector is concatenation of result_strides, condition_strides, input1_strides and input2_strides */ + std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin()); + std::copy(condition_strides, condition_strides + result_ndim, strides_host_packed.begin() + result_ndim); + std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim); + std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 3 * result_ndim); + + auto copy_strides_ev = + q.copy(strides_host_packed.data(), dev_strides_data, strides_host_packed.size()); + + auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { + const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ + { + const shape_elem_type* result_strides_data = &dev_strides_data[0]; + const shape_elem_type* condition_strides_data = &dev_strides_data[1]; + const shape_elem_type* input1_strides_data = &dev_strides_data[2]; + const shape_elem_type* input2_strides_data = &dev_strides_data[3]; + + size_t condition_id = 0; + size_t input1_id = 0; + size_t input2_id = 0; + + for (size_t i = 0; i < result_ndim; ++i) + { + const size_t output_xyz_id = + get_xyz_id_by_id_inkernel(output_id, result_strides_data, result_ndim, i); + condition_id += output_xyz_id * condition_strides_data[i]; + input1_id += output_xyz_id * input1_strides_data[i]; + input2_id += output_xyz_id * input2_strides_data[i]; + } + + const bool condition = condition_data[condition_id]; + const _DataType_output input1_elem = input1_data[input1_id]; + const _DataType_output input2_elem = input2_data[input2_id]; + result[output_id] = (condition) ? input1_elem : input2_elem; + } + }; + auto kernel_func = [&](sycl::handler& cgh) { + cgh.depends_on(copy_strides_ev); + cgh.parallel_for>( + gws, kernel_parallel_for_func); + }; + + q.submit(kernel_func).wait(); + + sycl::free(dev_strides_data, q); + return event_ref; + } + else + { + auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { + const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ + + const bool condition = condition_data[i]; + const _DataType_output input1_elem = input1_data[i]; + const _DataType_output input2_elem = input2_data[i]; + result[i] = (condition) ? input1_elem : input2_elem; + }; + auto kernel_func = [&](sycl::handler& cgh) { + cgh.parallel_for>( + gws, kernel_parallel_for_func); + }; + event = q.submit(kernel_func); + } + + event_ref = reinterpret_cast(&event); + return DPCTLEvent_Copy(event_ref); + + return event_ref; +} + +template +DPCTLSyclEventRef (*dpnp_where_ext_c)(DPCTLSyclQueueRef, + void*, + const size_t, + const size_t, + const shape_elem_type*, + const shape_elem_type*, + const void*, + const size_t, + const size_t, + const shape_elem_type*, + const shape_elem_type*, + const void*, + const size_t, + const size_t, + const shape_elem_type*, + const shape_elem_type*, + const void*, + const size_t, + const size_t, + const shape_elem_type*, + const shape_elem_type*, + const DPCTLEventVectorRef) = dpnp_where_c<_DataType_output, _DataType_input1, _DataType_input2>; + +template +static void func_map_searching_2arg_3type_core(func_map_t& fmap) +{ + ((fmap[DPNPFuncName::DPNP_FN_WHERE_EXT][FT1][FTs] = + {populate_func_types(), + (void*)dpnp_where_ext_c()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); +} + +template +static void func_map_searching_2arg_3type_helper(func_map_t& fmap) +{ + ((func_map_searching_2arg_3type_core(fmap)), ...); +} + void func_map_init_searching(func_map_t& fmap) { fmap[DPNPFuncName::DPNP_FN_ARGMAX][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_argmax_default_c}; @@ -177,5 +430,7 @@ void func_map_init_searching(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_ARGMIN_EXT][eft_DBL][eft_INT] = {eft_INT, (void*)dpnp_argmin_ext_c}; fmap[DPNPFuncName::DPNP_FN_ARGMIN_EXT][eft_DBL][eft_LNG] = {eft_LNG, (void*)dpnp_argmin_ext_c}; + func_map_searching_2arg_3type_helper(fmap); + return; } diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 9bf161b0aaf7..9edf6255ef52 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -356,6 +356,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_VANDER_EXT DPNP_FN_VAR DPNP_FN_VAR_EXT + DPNP_FN_WHERE_EXT DPNP_FN_ZEROS DPNP_FN_ZEROS_LIKE @@ -578,6 +579,7 @@ Searching functions """ cpdef dpnp_descriptor dpnp_argmax(dpnp_descriptor array1) cpdef dpnp_descriptor dpnp_argmin(dpnp_descriptor array1) +cpdef dpnp_descriptor dpnp_where(dpnp_descriptor cond_obj, dpnp_descriptor x_obj, dpnp_descriptor y_obj) """ Trigonometric functions diff --git a/dpnp/dpnp_algo/dpnp_algo_searching.pyx b/dpnp/dpnp_algo/dpnp_algo_searching.pyx index 59ce8475181a..44621b5cca04 100644 --- a/dpnp/dpnp_algo/dpnp_algo_searching.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_searching.pyx @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -36,7 +36,8 @@ and the rest of the library __all__ += [ "dpnp_argmax", - "dpnp_argmin" + "dpnp_argmin", + "dpnp_where" ] @@ -45,6 +46,29 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_search_1in_1out_func_ptr_t)(c_dpctl.D void * , void * , size_t, const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*where_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void *, + const size_t, + const size_t, + const shape_elem_type * , + const shape_elem_type * , + void *, + const size_t, + const size_t, + const shape_elem_type * , + const shape_elem_type * , + void *, + const size_t, + const size_t, + const shape_elem_type * , + const shape_elem_type * , + void *, + const size_t, + const size_t, + const shape_elem_type * , + const shape_elem_type * , + const c_dpctl.DPCTLEventVectorRef) except + + cpdef utils.dpnp_descriptor dpnp_argmax(utils.dpnp_descriptor in_array1): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype) @@ -116,3 +140,81 @@ cpdef utils.dpnp_descriptor dpnp_argmin(utils.dpnp_descriptor in_array1): c_dpctl.DPCTLEvent_Delete(event_ref) return result + + +cpdef utils.dpnp_descriptor dpnp_where(utils.dpnp_descriptor cond_obj, + utils.dpnp_descriptor x_obj, + utils.dpnp_descriptor y_obj): + # Convert object type to C enum DPNPFuncType + cdef DPNPFuncType cond_c_type = dpnp_dtype_to_DPNPFuncType(cond_obj.dtype) + cdef DPNPFuncType x_c_type = dpnp_dtype_to_DPNPFuncType(x_obj.dtype) + cdef DPNPFuncType y_c_type = dpnp_dtype_to_DPNPFuncType(y_obj.dtype) + + # get the FPTR data structure + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_WHERE_EXT, x_c_type, y_c_type) + + # Create result array + cdef shape_type_c cond_shape = cond_obj.shape + cdef shape_type_c x_shape = x_obj.shape + cdef shape_type_c y_shape = y_obj.shape + + cdef shape_type_c cond_strides = utils.strides_to_vector(cond_obj.strides, cond_shape) + cdef shape_type_c x_strides = utils.strides_to_vector(x_obj.strides, x_shape) + cdef shape_type_c y_strides = utils.strides_to_vector(y_obj.strides, y_shape) + + cdef shape_type_c cond_x_shape = utils.get_common_shape(cond_shape, x_shape) + cdef shape_type_c cond_y_shape = utils.get_common_shape(cond_shape, y_shape) + cdef shape_type_c result_shape = utils.get_common_shape(cond_x_shape, cond_y_shape) + cdef utils.dpnp_descriptor result + + result_usm_type, result_sycl_queue = utils_py.get_usm_allocations([cond_obj.get_array(), + x_obj.get_array(), + y_obj.get_array()]) + + # get FPTR function and return type + cdef where_func_ptr_t func = < where_func_ptr_t > kernel_data.ptr + cdef DPNPFuncType return_type = kernel_data.return_type + + """ Create result array with type given by FPTR data """ + result = utils.create_output_descriptor(result_shape, + return_type, + None, + device=None, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) + + cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape) + + result_obj = result.get_array() + + cdef c_dpctl.SyclQueue q = < c_dpctl.SyclQueue > result_obj.sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + + """ Call FPTR function """ + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + result.get_data(), + result.size, + result.ndim, + result_shape.data(), + result_strides.data(), + cond_obj.get_data(), + cond_obj.size, + cond_obj.ndim, + cond_shape.data(), + cond_strides.data(), + x_obj.get_data(), + x_obj.size, + x_obj.ndim, + x_shape.data(), + x_strides.data(), + y_obj.get_data(), + y_obj.size, + y_obj.ndim, + y_shape.data(), + y_strides.data(), + NULL) # dep_events_ref) + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) + + return result diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py index ad349793ae66..534f8fd97a5d 100644 --- a/dpnp/dpnp_iface_searching.py +++ b/dpnp/dpnp_iface_searching.py @@ -176,22 +176,62 @@ def searchsorted(a, v, side='left', sorter=None): return call_origin(numpy.where, a, v, side, sorter) -def where(condition, x=None, y=None): +def where(condition, x=None, y=None, /): """ - Find indices where elements should be inserted to maintain order. + Return elements chosen from `x` or `y` depending on `condition`. - For full documentation refer to :obj:`numpy.searchsorted`. + When only `condition` is provided, this function is a shorthand for + :obj:`dpnp.nonzero(condition)`. + + For full documentation refer to :obj:`numpy.where`. + + Returns + ------- + y : dpnp.ndarray + An array with elements from `x` where `condition` is True, and elements + from `y` elsewhere. + + Limitations + ----------- + Parameters `condition`, `x` and `y` are supported as either scalar, :class:`dpnp.ndarray` + or :class:`dpctl.tensor.usm_ndarray`. + Otherwise the function will be executed sequentially on CPU. + Data type of `condition` parameter is limited by :obj:`dpnp.bool`. + Input array data types of `x` and `y` are limited by supported DPNP :ref:`Data types`. + + See Also + -------- + :obj:`nonzero` : The function that is called when `x` and `y`are omitted. + + Examples + -------- + >>> import dpnp as dp + >>> a = dp.arange(10) + >>> d + array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + >>> dp.where(a < 5, a, 10*a) + array([ 0, 1, 2, 3, 4, 50, 60, 70, 80, 90]) """ missing = (x is None, y is None).count(True) if missing == 1: raise ValueError("Must provide both 'x' and 'y' or neither.") - if missing == 2: - condition_desc = dpnp.get_dpnp_descriptor(condition, copy_when_nondefault_queue=False) - if condition_desc: - return dpnp_nonzero(condition_desc) - - return call_origin(numpy.nonzero, condition) + elif missing == 2: + return dpnp.nonzero(condition) + elif missing == 0: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([condition, x, y]) + + c_desc = dpnp.get_dpnp_descriptor(condition, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x_desc = dpnp.get_dpnp_descriptor(x, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + y_desc = dpnp.get_dpnp_descriptor(y, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if c_desc and x_desc and y_desc: + if c_desc.dtype != dpnp.bool: + raise TypeError("condition must be a boolean array") + return dpnp_where(c_desc, x_desc, y_desc).get_pyobj() return call_origin(numpy.where, condition, x, y) diff --git a/tests/third_party/cupy/sorting_tests/test_search.py b/tests/third_party/cupy/sorting_tests/test_search.py index b0b7f94617a6..17751aed75c0 100644 --- a/tests/third_party/cupy/sorting_tests/test_search.py +++ b/tests/third_party/cupy/sorting_tests/test_search.py @@ -268,12 +268,14 @@ class TestWhereTwoArrays(unittest.TestCase): @testing.for_all_dtypes_combination( names=['cond_type', 'x_type', 'y_type']) - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(type_check=False) def test_where_two_arrays(self, xp, cond_type, x_type, y_type): m = testing.shaped_random(self.cond_shape, xp, xp.bool_) # Almost all values of a matrix `shaped_random` makes are not zero. # To make a sparse matrix, we need multiply `m`. cond = testing.shaped_random(self.cond_shape, xp, cond_type) * m + if xp is cupy: + cond = cond.astype(cupy.bool) x = testing.shaped_random(self.x_shape, xp, x_type, seed=0) y = testing.shaped_random(self.y_shape, xp, y_type, seed=1) return xp.where(cond, x, y) From 16cce9b34fed5f8dcb778139a3fdf161e5905d29 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Sun, 5 Mar 2023 20:06:09 +0100 Subject: [PATCH 4/5] Update dpnp/backend/include/dpnp_iface_fptr.hpp --- dpnp/backend/include/dpnp_iface_fptr.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index 517338b05dea..3a393708255a 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -377,7 +377,7 @@ enum class DPNPFuncName : size_t DPNP_FN_VANDER_EXT, /**< Used in numpy.vander() impl, requires extra parameters */ DPNP_FN_VAR, /**< Used in numpy.var() impl */ DPNP_FN_VAR_EXT, /**< Used in numpy.var() impl, requires extra parameters */ - DPNP_FN_WHERE_EXT, /**< Used in numpy.var() impl, requires extra parameters */ + DPNP_FN_WHERE_EXT, /**< Used in numpy.where() impl, requires extra parameters */ DPNP_FN_ZEROS, /**< Used in numpy.zeros() impl */ DPNP_FN_ZEROS_LIKE, /**< Used in numpy.zeros_like() impl */ DPNP_FN_LAST, /**< The latest element of the enumeration */ From 3926f899efc8c8b6ac5f9cd9fffa056cd402c25f Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Sun, 5 Mar 2023 16:25:30 -0600 Subject: [PATCH 5/5] Use dpctl.tensor.nonzero() implementation --- dpnp/dpnp_iface_searching.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py index 534f8fd97a5d..91d93991fe2d 100644 --- a/dpnp/dpnp_iface_searching.py +++ b/dpnp/dpnp_iface_searching.py @@ -2,7 +2,7 @@ # distutils: language = c++ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -44,7 +44,9 @@ from dpnp.dpnp_utils import * import dpnp + import numpy +import dpctl.tensor as dpt __all__ = [ @@ -218,7 +220,9 @@ def where(condition, x=None, y=None, /): if missing == 1: raise ValueError("Must provide both 'x' and 'y' or neither.") elif missing == 2: - return dpnp.nonzero(condition) + # TODO: rework through dpnp.nonzero() once ready + return dpt.nonzero(dpt.asarray(condition)) + # return dpnp.nonzero(condition) elif missing == 0: # get USM type and queue to copy scalar from the host memory into a USM allocation usm_type, queue = get_usm_allocations([condition, x, y])