From ab63fff0cc3e9af4ea6da812a0a63677c32c1f31 Mon Sep 17 00:00:00 2001
From: KsanaKozlova <oksana.kozlova@intel.com>
Date: Tue, 11 May 2021 21:36:10 +0300
Subject: [PATCH 1/3] add broadcasting for remainder

---
 .../kernels/dpnp_krnl_mathematical.cpp        | 95 ++++++++++++-------
 dpnp/dpnp_iface_mathematical.py               | 47 ++++++---
 2 files changed, 97 insertions(+), 45 deletions(-)
diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
index bedf614429c9..4bdb6a6e79f5 100644
--- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
@@ -29,6 +29,7 @@
 
 #include <dpnp_iface.hpp>
 #include "dpnp_fptr.hpp"
+#include "dpnp_iterator.hpp"
 #include "dpnp_utils.hpp"
 #include "queue_sycl.hpp"
 
@@ -276,50 +277,80 @@ void dpnp_remainder_c(void* result_out,
                       const size_t input2_shape_ndim,
                       const size_t* where)
 {
-    (void)input1_shape;
-    (void)input1_shape_ndim;
-    (void)input2_size;
-    (void)input2_shape;
-    (void)input2_shape_ndim;
     (void)where;
 
-    cl::sycl::event event;
-    _DataType_input1* input1 = reinterpret_cast<_DataType_input1*>(const_cast<void*>(input1_in));
-    _DataType_input2* input2 = reinterpret_cast<_DataType_input2*>(const_cast<void*>(input2_in));
+    if (!input1_size || !input2_size)
+    {
+        return;
+    }
+
+    _DataType_input1* input1_data = reinterpret_cast<_DataType_input1*>(const_cast<void*>(input1_in));
+    _DataType_input2* input2_data = reinterpret_cast<_DataType_input2*>(const_cast<void*>(input2_in));
     _DataType_output* result = reinterpret_cast<_DataType_output*>(result_out);
 
-    if constexpr ((std::is_same<_DataType_input1, double>::value || std::is_same<_DataType_input1, float>::value) &&
-                  std::is_same<_DataType_input2, _DataType_input1>::value)
+    std::vector<size_t> result_shape = get_result_shape(input1_shape, input1_shape_ndim,
+                                                            input2_shape, input2_shape_ndim); 
+
+    DPNPC_id<_DataType_input1>* input1_it;
+    const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>);
+    input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(input1_it_size_in_bytes));
+    new (input1_it) DPNPC_id<_DataType_input1>(input1_data, input1_shape, input1_shape_ndim);
+
+    input1_it->broadcast_to_shape(result_shape);
+
+    DPNPC_id<_DataType_input2>* input2_it;
+    const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>);
+    input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(input2_it_size_in_bytes));
+    new (input2_it) DPNPC_id<_DataType_input2>(input2_data, input2_shape, input2_shape_ndim);
+
+    input2_it->broadcast_to_shape(result_shape);
+    
+    const size_t result_size = input1_it->get_output_size();
+
+    cl::sycl::range<1> gws(result_size); 
+    auto kernel_parallel_for_func = [=](cl::sycl::id<1> global_id) {
+        const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */
+        const _DataType_output input1_elem = (*input1_it)[i];
+        const _DataType_output input2_elem = (*input2_it)[i];
+        double fmod = cl::sycl::fmod((double)input1_elem, (double)input2_elem);
+        double add = fmod + input2_elem;
+        result[i] = cl::sycl::fmod(add, (double)input2_elem);
+
+    };
+    auto kernel_func = [&](cl::sycl::handler& cgh) {
+        cgh.parallel_for<class dpnp_remainder_c_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(
+            gws, kernel_parallel_for_func);
+    };
+
+    cl::sycl::event event;
+
+    if (input1_size == input2_size)
     {
-        event = oneapi::mkl::vm::fmod(DPNP_QUEUE, input1_size, input1, input2, result);
-        event.wait();
-        event = oneapi::mkl::vm::add(DPNP_QUEUE, input1_size, result, input2, result);
-        event.wait();
-        event = oneapi::mkl::vm::fmod(DPNP_QUEUE, input1_size, result, input2, result);
+        if constexpr ((std::is_same<_DataType_input1, double>::value ||
+                        std::is_same<_DataType_input1, float>::value) &&
+                        std::is_same<_DataType_input2, _DataType_input1>::value)
+        {
+            event = oneapi::mkl::vm::fmod(DPNP_QUEUE, input1_size, input1_data, input2_data, result);
+            event.wait();
+            event = oneapi::mkl::vm::add(DPNP_QUEUE, input1_size, result, input2_data, result);
+            event.wait();
+            event = oneapi::mkl::vm::fmod(DPNP_QUEUE, input1_size, result, input2_data, result);
+        }
+        else
+        {
+            event = DPNP_QUEUE.submit(kernel_func);
+        }
     }
     else
     {
-        cl::sycl::range<1> gws(input1_size);
-        auto kernel_parallel_for_func = [=](cl::sycl::id<1> global_id) {
-            size_t i = global_id[0]; /*for (size_t i = 0; i < size; ++i)*/
-            {
-                _DataType_input1 input_elem1 = input1[i];
-                _DataType_input2 input_elem2 = input2[i];
-                double fmod = cl::sycl::fmod((double)input_elem1, (double)input_elem2);
-                double add = fmod + input_elem2;
-                result[i] = cl::sycl::fmod(add, (double)input_elem2);
-            }
-        };
-
-        auto kernel_func = [&](cl::sycl::handler& cgh) {
-            cgh.parallel_for<class dpnp_remainder_c_kernel<_DataType_input1, _DataType_input2, _DataType_output>>(
-                gws, kernel_parallel_for_func);
-        };
-
         event = DPNP_QUEUE.submit(kernel_func);
     }
 
     event.wait();
+
+    input1_it->~DPNPC_id();
+    input2_it->~DPNPC_id();
+
 }
 
 template <typename _KernelNameSpecialization1, typename _KernelNameSpecialization2, typename _KernelNameSpecialization3>
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index e89f800017d2..d6429dcd5f54 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -1325,7 +1325,7 @@ def prod(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, wher
     return call_origin(numpy.prod, x1, axis=axis, dtype=dtype, out=out, keepdims=keepdims, initial=initial, where=where)
 
 
-def remainder(x1, x2, **kwargs):
+def remainder(x1, x2, out=None, where=True, **kwargs):
     """
     Return element-wise remainder of division.
 
@@ -1333,7 +1333,8 @@ def remainder(x1, x2, **kwargs):
 
     Limitations
     -----------
-        Parameters ``x1`` and ``x2`` are supported as :obj:`dpnp.ndarray`.
+        Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
+        Parameters ``out`` and ``where`` are supported with their default values.
         Keyword arguments ``kwargs`` are currently unsupported.
         Otherwise the functions will be executed sequentially on CPU.
         Input array data types are limited by supported DPNP :ref:`Data types`.
@@ -1354,19 +1355,39 @@ def remainder(x1, x2, **kwargs):
 
     """
 
-    is_x1_dparray = isinstance(x1, dparray)
-    is_x2_dparray = isinstance(x2, dparray)
-
-    if (not use_origin_backend(x1) and is_x1_dparray and is_x2_dparray and not kwargs):
-        if (x1.size != x2.size):
-            checker_throw_value_error("remainder", "size", x1.size, x2.size)
-
-        if (x1.shape != x2.shape):
-            checker_throw_value_error("remainder", "shape", x1.shape, x2.shape)
+    x1_is_scalar, x2_is_scalar = dpnp.isscalar(x1), dpnp.isscalar(x2)
+    x1_is_dparray, x2_is_dparray = isinstance(x1, dparray), isinstance(x2, dparray)
+    
+    if not use_origin_backend(x1) and not kwargs:
+        if not x1_is_dparray and not x1_is_scalar:
+            pass
+        elif not x2_is_dparray and not x2_is_scalar:
+            pass
+        elif x1_is_scalar and x2_is_scalar:
+            pass
+        elif x1_is_dparray and x1.ndim == 0:
+            pass
+        elif x2_is_dparray and x2.ndim == 0:
+            pass
+        elif x2_is_scalar and x2 == 0:
+            pass
+        elif x1_is_dparray and x2_is_dparray and x1.size != x2.size:
+            pass
+        elif x1_is_dparray and x2_is_dparray and x1.shape != x2.shape:
+            pass
+        elif out is not None and not isinstance(out, dparray):
+            pass
+        elif out is not None:
+            pass
+        elif not where:
+            pass
+        elif x1_is_scalar and x2.ndim > 1:
+            pass
+        else:
+            return dpnp_remainder(x1, x2, out=out, where=where)
 
-        return dpnp_remainder(x1, x2)
+    return call_origin(numpy.remainder, x1, x2, out=out, where=where, **kwargs)
 
-    return call_origin(numpy.remainder, x1, x2, **kwargs)
 
 
 def round_(a, decimals=0, out=None):

From 08b4d3d27974c34e4d5eb8a5f32bd7b33a2e7e67 Mon Sep 17 00:00:00 2001
From: KsanaKozlova <oksana.kozlova@intel.com>
Date: Thu, 13 May 2021 21:51:48 +0300
Subject: [PATCH 2/3] add tests

---
 dpnp/dpnp_iface_mathematical.py | 10 ++++++----
 tests/test_mathematical.py      |  3 +++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index d6429dcd5f54..aa5cf644baa2 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -1325,7 +1325,7 @@ def prod(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, wher
     return call_origin(numpy.prod, x1, axis=axis, dtype=dtype, out=out, keepdims=keepdims, initial=initial, where=where)
 
 
-def remainder(x1, x2, out=None, where=True, **kwargs):
+def remainder(x1, x2, out=None, where=True, dtype=None, **kwargs):
     """
     Return element-wise remainder of division.
 
@@ -1334,7 +1334,7 @@ def remainder(x1, x2, out=None, where=True, **kwargs):
     Limitations
     -----------
         Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-        Parameters ``out`` and ``where`` are supported with their default values.
+        Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
         Keyword arguments ``kwargs`` are currently unsupported.
         Otherwise the functions will be executed sequentially on CPU.
         Input array data types are limited by supported DPNP :ref:`Data types`.
@@ -1377,6 +1377,8 @@ def remainder(x1, x2, out=None, where=True, **kwargs):
             pass
         elif out is not None and not isinstance(out, dparray):
             pass
+        elif dtype is not None:
+            pass
         elif out is not None:
             pass
         elif not where:
@@ -1384,9 +1386,9 @@ def remainder(x1, x2, out=None, where=True, **kwargs):
         elif x1_is_scalar and x2.ndim > 1:
             pass
         else:
-            return dpnp_remainder(x1, x2, out=out, where=where)
+            return dpnp_remainder(x1, x2, out=out, where=where, dtype=dtype)
 
-    return call_origin(numpy.remainder, x1, x2, out=out, where=where, **kwargs)
+    return call_origin(numpy.remainder, x1, x2, out=out, where=where, dtype=dtype **kwargs)
 
 
 
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 24ad886e8da2..1ccb03a89d9e 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -117,6 +117,9 @@ def test_minimum(self, dtype, lhs, rhs):
     def test_multiply(self, dtype, lhs, rhs):
         self._test_mathematical('multiply', dtype, lhs, rhs)
 
+    def test_remainder(self, dtype, lhs, rhs):
+        self._test_mathematical('remainder', dtype, lhs, rhs)
+
     def test_power(self, dtype, lhs, rhs):
         self._test_mathematical('power', dtype, lhs, rhs)
 

From a99ae7d012a0d3901b614d2443512ac6666442d1 Mon Sep 17 00:00:00 2001
From: KsanaKozlova <oksana.kozlova@intel.com>
Date: Tue, 17 Aug 2021 04:16:00 -0500
Subject: [PATCH 3/3] add descriptor to python layer

---
 .../kernels/dpnp_krnl_mathematical.cpp        |  6 ++--
 dpnp/dpnp_iface_mathematical.py               | 30 ++++++++++---------
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
index 493ea86c94b8..00269c034d66 100644
--- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
@@ -378,11 +378,11 @@ void dpnp_remainder_c(void* result_out,
 
     cl::sycl::range<1> gws(result_size); 
     auto kernel_parallel_for_func = [=](cl::sycl::id<1> global_id) {
-        const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */
+        const size_t i = global_id[0];
         const _DataType_output input1_elem = (*input1_it)[i];
         const _DataType_output input2_elem = (*input2_it)[i];
-        double fmod = cl::sycl::fmod((double)input1_elem, (double)input2_elem);
-        double add = fmod + input2_elem;
+        double fmod_res = cl::sycl::fmod((double)input1_elem, (double)input2_elem);
+        double add = fmod_res + input2_elem;
         result[i] = cl::sycl::fmod(add, (double)input2_elem);
 
     };
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 2c55181ac262..01c7448a39a3 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -1414,25 +1414,27 @@ def remainder(x1, x2, out=None, where=True, dtype=None, **kwargs):
 
     """
 
-    x1_is_scalar, x2_is_scalar = dpnp.isscalar(x1), dpnp.isscalar(x2)
-    x1_is_dparray, x2_is_dparray = isinstance(x1, dparray), isinstance(x2, dparray)
-    
-    if not use_origin_backend(x1) and not kwargs:
-        if not x1_is_dparray and not x1_is_scalar:
+    x1_is_scalar = dpnp.isscalar(x1)
+    x2_is_scalar = dpnp.isscalar(x2)
+    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x2_desc = dpnp.get_dpnp_descriptor(x2)
+
+    if x1_desc and x2_desc and not kwargs:
+        if not x1_desc and not x1_is_scalar:
             pass
-        elif not x2_is_dparray and not x2_is_scalar:
+        elif not x2_desc and not x2_is_scalar:
             pass
         elif x1_is_scalar and x2_is_scalar:
             pass
-        elif x1_is_dparray and x1.ndim == 0:
+        elif x1_desc and x1_desc.ndim == 0:
             pass
-        elif x2_is_dparray and x2.ndim == 0:
+        elif x2_desc and x2_desc.ndim == 0:
             pass
-        elif x2_is_scalar and x2 == 0:
+        elif x2_is_scalar and not x2_desc:
             pass
-        elif x1_is_dparray and x2_is_dparray and x1.size != x2.size:
+        elif x1_desc and x2_desc and x1_desc.size != x2_desc.size:
             pass
-        elif x1_is_dparray and x2_is_dparray and x1.shape != x2.shape:
+        elif x1_desc and x2_desc and x1_desc.shape != x2_desc.shape:
             pass
         elif out is not None and not isinstance(out, dparray):
             pass
@@ -1442,12 +1444,12 @@ def remainder(x1, x2, out=None, where=True, dtype=None, **kwargs):
             pass
         elif not where:
             pass
-        elif x1_is_scalar and x2.ndim > 1:
+        elif x1_is_scalar and x2_desc.ndim > 1:
             pass
         else:
-            return dpnp_remainder(x1, x2, out=out, where=where, dtype=dtype)
+            return dpnp_remainder(x1_desc, x2_desc, out=out, where=where, dtype=dtype)
 
-    return call_origin(numpy.remainder, x1, x2, out=out, where=where, dtype=dtype **kwargs)
+    return call_origin(numpy.remainder, x1, x2, out=out, where=where, dtype=dtype, **kwargs)
 
 
 def round_(a, decimals=0, out=None):