Skip to content
94 changes: 62 additions & 32 deletions dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,50 +346,80 @@ void dpnp_remainder_c(void* result_out,
const size_t input2_shape_ndim,
const size_t* where)
{
(void)input1_shape;
(void)input1_shape_ndim;
(void)input2_size;
(void)input2_shape;
(void)input2_shape_ndim;
(void)where;

cl::sycl::event event;
_DataType_input1* input1 = reinterpret_cast<_DataType_input1*>(const_cast<void*>(input1_in));
_DataType_input2* input2 = reinterpret_cast<_DataType_input2*>(const_cast<void*>(input2_in));
if (!input1_size || !input2_size)
{
return;
}

_DataType_input1* input1_data = reinterpret_cast<_DataType_input1*>(const_cast<void*>(input1_in));
_DataType_input2* input2_data = reinterpret_cast<_DataType_input2*>(const_cast<void*>(input2_in));
_DataType_output* result = reinterpret_cast<_DataType_output*>(result_out);

if constexpr ((std::is_same<_DataType_input1, double>::value || std::is_same<_DataType_input1, float>::value) &&
std::is_same<_DataType_input2, _DataType_input1>::value)
std::vector<size_t> result_shape = get_result_shape(input1_shape, input1_shape_ndim,
input2_shape, input2_shape_ndim);

DPNPC_id<_DataType_input1>* input1_it;
const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>);
input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(input1_it_size_in_bytes));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can call mem allocator at once here.

new (input1_it) DPNPC_id<_DataType_input1>(input1_data, input1_shape, input1_shape_ndim);

input1_it->broadcast_to_shape(result_shape);

DPNPC_id<_DataType_input2>* input2_it;
const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>);
input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(input2_it_size_in_bytes));
new (input2_it) DPNPC_id<_DataType_input2>(input2_data, input2_shape, input2_shape_ndim);

input2_it->broadcast_to_shape(result_shape);

const size_t result_size = input1_it->get_output_size();

cl::sycl::range<1> gws(result_size);
auto kernel_parallel_for_func = [=](cl::sycl::id<1> global_id) {
const size_t i = global_id[0];
const _DataType_output input1_elem = (*input1_it)[i];
const _DataType_output input2_elem = (*input2_it)[i];
double fmod_res = cl::sycl::fmod((double)input1_elem, (double)input2_elem);
double add = fmod_res + input2_elem;
result[i] = cl::sycl::fmod(add, (double)input2_elem);

};
auto kernel_func = [&](cl::sycl::handler& cgh) {
cgh.parallel_for<class dpnp_remainder_c_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(
gws, kernel_parallel_for_func);
};

cl::sycl::event event;

if (input1_size == input2_size)
{
event = oneapi::mkl::vm::fmod(DPNP_QUEUE, input1_size, input1, input2, result);
event.wait();
event = oneapi::mkl::vm::add(DPNP_QUEUE, input1_size, result, input2, result);
event.wait();
event = oneapi::mkl::vm::fmod(DPNP_QUEUE, input1_size, result, input2, result);
if constexpr ((std::is_same<_DataType_input1, double>::value ||
std::is_same<_DataType_input1, float>::value) &&
std::is_same<_DataType_input2, _DataType_input1>::value)
{
event = oneapi::mkl::vm::fmod(DPNP_QUEUE, input1_size, input1_data, input2_data, result);
event.wait();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use dependent event. Do not wait for each.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use dependent event. Do not wait for each.

event = oneapi::mkl::vm::add(DPNP_QUEUE, input1_size, result, input2_data, result);
event.wait();
event = oneapi::mkl::vm::fmod(DPNP_QUEUE, input1_size, result, input2_data, result);
}
else
{
event = DPNP_QUEUE.submit(kernel_func);
}
}
else
{
cl::sycl::range<1> gws(input1_size);
auto kernel_parallel_for_func = [=](cl::sycl::id<1> global_id) {
size_t i = global_id[0]; /*for (size_t i = 0; i < size; ++i)*/
{
_DataType_input1 input_elem1 = input1[i];
_DataType_input2 input_elem2 = input2[i];
double fmod = cl::sycl::fmod((double)input_elem1, (double)input_elem2);
double add = fmod + input_elem2;
result[i] = cl::sycl::fmod(add, (double)input_elem2);
}
};

auto kernel_func = [&](cl::sycl::handler& cgh) {
cgh.parallel_for<class dpnp_remainder_c_kernel<_DataType_input1, _DataType_input2, _DataType_output>>(
gws, kernel_parallel_for_func);
};

event = DPNP_QUEUE.submit(kernel_func);
}

event.wait();

input1_it->~DPNPC_id();
input2_it->~DPNPC_id();
Comment on lines +420 to +421

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that we need to redesign this class, if explicitly call of the destructor is required for its interface.


}

template <typename _KernelNameSpecialization1, typename _KernelNameSpecialization2, typename _KernelNameSpecialization3>
Expand Down
37 changes: 31 additions & 6 deletions dpnp/dpnp_iface_mathematical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1385,15 +1385,16 @@ def prod(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, wher
return call_origin(numpy.prod, x1, axis=axis, dtype=dtype, out=out, keepdims=keepdims, initial=initial, where=where)


def remainder(x1, x2, **kwargs):
def remainder(x1, x2, out=None, where=True, dtype=None, **kwargs):
"""
Return element-wise remainder of division.

For full documentation refer to :obj:`numpy.remainder`.

Limitations
-----------
Parameters ``x1`` and ``x2`` are supported as :obj:`dpnp.ndarray`.
Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
Keyword arguments ``kwargs`` are currently unsupported.
Otherwise the functions will be executed sequentially on CPU.
Input array data types are limited by supported DPNP :ref:`Data types`.
Expand All @@ -1414,18 +1415,42 @@ def remainder(x1, x2, **kwargs):

"""

x1_is_scalar = dpnp.isscalar(x1)
x2_is_scalar = dpnp.isscalar(x2)
x1_desc = dpnp.get_dpnp_descriptor(x1)
x2_desc = dpnp.get_dpnp_descriptor(x2)

if x1_desc and x2_desc and not kwargs:
if x1_desc.size != x2_desc.size:
if not x1_desc and not x1_is_scalar:
pass
elif not x2_desc and not x2_is_scalar:
pass
elif x1_desc.shape != x2_desc.shape:
elif x1_is_scalar and x2_is_scalar:
pass
elif x1_desc and x1_desc.ndim == 0:
pass
elif x2_desc and x2_desc.ndim == 0:
pass
elif x2_is_scalar and not x2_desc:
pass
elif x1_desc and x2_desc and x1_desc.size != x2_desc.size:
pass
elif x1_desc and x2_desc and x1_desc.shape != x2_desc.shape:
pass
elif out is not None and not isinstance(out, dparray):
pass
elif dtype is not None:
pass
elif out is not None:
pass
elif not where:
pass
elif x1_is_scalar and x2_desc.ndim > 1:
pass
else:
return dpnp_remainder(x1_desc, x2_desc).get_pyobj()
return dpnp_remainder(x1_desc, x2_desc, out=out, where=where, dtype=dtype)

return call_origin(numpy.remainder, x1, x2, **kwargs)
return call_origin(numpy.remainder, x1, x2, out=out, where=where, dtype=dtype, **kwargs)


def round_(a, decimals=0, out=None):
Expand Down
3 changes: 3 additions & 0 deletions tests/test_mathematical.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ def test_minimum(self, dtype, lhs, rhs):
def test_multiply(self, dtype, lhs, rhs):
self._test_mathematical('multiply', dtype, lhs, rhs)

def test_remainder(self, dtype, lhs, rhs):
self._test_mathematical('remainder', dtype, lhs, rhs)

def test_power(self, dtype, lhs, rhs):
self._test_mathematical('power', dtype, lhs, rhs)

Expand Down