From 32c234e8c773daf78dddca8c99543c38c8731bbc Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Fri, 11 Nov 2022 20:50:41 +0100
Subject: [PATCH 1/7] Merge changes from master to gold (#1232)

* Decrease array size for stability (#1227)

* dpnp_array must expose usm_type (#1228)

* Leave is_host() call with old compiler only (#1224)

* Leave is_host() call with old compiler only

* Incremented 2023-SWITCHOVER timestamp

* Exclude running upload steps for fork repo (#1230)

* Setting version to 0.11.0 (#1231)
---
 .github/workflows/conda-package.yml       |  2 +-
 doc/conf.py                               |  4 ++--
 dpnp/backend/CMakeLists.txt               |  4 ++--
 dpnp/backend/doc/Doxyfile                 |  2 +-
 dpnp/backend/src/dpnp_utils.hpp           | 11 +++++++++-
 dpnp/backend/src/dpnpc_memory_adapter.hpp |  5 ++++-
 dpnp/backend/src/queue_sycl.cpp           |  9 ++++++--
 dpnp/dpnp_array.py                        |  4 ++++
 dpnp/version.py                           |  2 +-
 tests/test_arraycreation.py               | 13 +-----------
 tests/test_random_state.py                | 26 ++++++++++++++---------
 tests/test_sycl_queue.py                  |  6 ++----
 12 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 870d52f1d778..89376d4bb262 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -456,7 +456,7 @@ jobs:
     needs: test_windows
 
     if: |
-      !github.event.pull_request.head.repo.fork  &&
+      !github.event.pull_request.head.repo.fork  && !github.event.push.repository.fork &&
       (github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/'))
 
     runs-on: windows-latest
diff --git a/doc/conf.py b/doc/conf.py
index a3d1cd5798b9..46505fa8f6db 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -31,9 +31,9 @@
 author = 'Intel'
 
 # The short X.Y version
-version = '0.10'
+version = '0.11'
 # The full version, including alpha/beta/rc tags
-release = '0.10.3'
+release = '0.11.0'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 62456a270a96..409a29f8a310 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -27,8 +27,8 @@
 
 cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
 
-# set(DPNP_VERSION 0.10.3)
-# set(DPNP_API_VERSION 0.10)
+# set(DPNP_VERSION 0.11.0)
+# set(DPNP_API_VERSION 0.11)
 
 # set directory where the custom finders live
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules")
diff --git a/dpnp/backend/doc/Doxyfile b/dpnp/backend/doc/Doxyfile
index 8e9b4e3d5ec1..6c83bb0e8465 100644
--- a/dpnp/backend/doc/Doxyfile
+++ b/dpnp/backend/doc/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "DPNP C++ backend kernel library"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 0.10.3
+PROJECT_NUMBER         = 0.11.0
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/dpnp/backend/src/dpnp_utils.hpp b/dpnp/backend/src/dpnp_utils.hpp
index f84dd3a74721..ec6a9c511c88 100644
--- a/dpnp/backend/src/dpnp_utils.hpp
+++ b/dpnp/backend/src/dpnp_utils.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2022, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -40,6 +40,15 @@
     (__LIBSYCL_MAJOR_VERSION > major) || (__LIBSYCL_MAJOR_VERSION == major and __LIBSYCL_MINOR_VERSION > minor) ||     \
         (__LIBSYCL_MAJOR_VERSION == major and __LIBSYCL_MINOR_VERSION == minor and __LIBSYCL_PATCH_VERSION >= patch)
 
+/**
+ * Version of SYCL DPC++ 2023 compiler at which transition to SYCL 2020 occurs.
+ * Intel(R) oneAPI DPC++ 2022.2.1 compiler has version 20221020L on Linux and
+ * 20221101L on Windows.
+ */
+#ifndef __SYCL_COMPILER_2023_SWITCHOVER
+#define __SYCL_COMPILER_2023_SWITCHOVER 20221102L
+#endif
+
 /**
  * @defgroup BACKEND_UTILS Backend C++ library utilities
  * @{
diff --git a/dpnp/backend/src/dpnpc_memory_adapter.hpp b/dpnp/backend/src/dpnpc_memory_adapter.hpp
index 3b07795ed5f6..dab09622a698 100644
--- a/dpnp/backend/src/dpnpc_memory_adapter.hpp
+++ b/dpnp/backend/src/dpnpc_memory_adapter.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2022, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -28,6 +28,7 @@
 #define DPNP_MEMORY_ADAPTER_H
 
 #include "queue_sycl.hpp"
+#include "dpnp_utils.hpp"
 
 /**
  * @ingroup BACKEND_UTILS
@@ -84,8 +85,10 @@ class DPNPC_ptr_adapter final
             std::cerr << "\n\t size_in_bytes=" << size_in_bytes;
             std::cerr << "\n\t pointer type=" << (long)src_ptr_type;
             std::cerr << "\n\t queue inorder=" << queue.is_in_order();
+#if (__SYCL_COMPILER_VERSION < __SYCL_COMPILER_2023_SWITCHOVER)
             std::cerr << "\n\t queue is_host=" << queue.is_host();
             std::cerr << "\n\t queue device is_host=" << queue.get_device().is_host();
+#endif
             std::cerr << "\n\t queue device is_cpu=" << queue.get_device().is_cpu();
             std::cerr << "\n\t queue device is_gpu=" << queue.get_device().is_gpu();
             std::cerr << "\n\t queue device is_accelerator=" << queue.get_device().is_accelerator();
diff --git a/dpnp/backend/src/queue_sycl.cpp b/dpnp/backend/src/queue_sycl.cpp
index 40bbd886f9dd..0810ed0aaba8 100644
--- a/dpnp/backend/src/queue_sycl.cpp
+++ b/dpnp/backend/src/queue_sycl.cpp
@@ -29,6 +29,7 @@
 
 #include <dpnp_iface.hpp>
 #include "queue_sycl.hpp"
+#include "dpnp_utils.hpp"
 
 #if defined(DPNP_LOCAL_QUEUE)
 sycl::queue* backend_sycl::queue = nullptr;
@@ -211,10 +212,14 @@ bool backend_sycl::backend_sycl_is_cpu()
 {
     sycl::queue& qptr = get_queue();
 
-    if (qptr.is_host() || qptr.get_device().is_cpu() || qptr.get_device().is_host())
-    {
+    if (qptr.get_device().is_cpu()) {
+        return true;
+    }
+#if (__SYCL_COMPILER_VERSION < __SYCL_COMPILER_2023_SWITCHOVER)
+    else if (qptr.is_host() || qptr.get_device().is_host()) {
         return true;
     }
+#endif
 
     return false;
 }
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 1ac50e12c381..c3d35ab0e729 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -115,6 +115,10 @@ def sycl_context(self):
     def device(self):
         return self._array_obj.device
 
+    @property
+    def usm_type(self):
+        return self._array_obj.usm_type
+
     def __abs__(self):
         return dpnp.abs(self)
 
diff --git a/dpnp/version.py b/dpnp/version.py
index 8d87f84a961c..160e8ec963a8 100644
--- a/dpnp/version.py
+++ b/dpnp/version.py
@@ -29,6 +29,6 @@
 DPNP version module
 """
 
-__version__: str = '0.10.3'
+__version__: str = '0.11.0'
 
 version: str = __version__
diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py
index 01abce88a504..e9f703d783dc 100644
--- a/tests/test_arraycreation.py
+++ b/tests/test_arraycreation.py
@@ -12,7 +12,7 @@
                          [0, -5, 10, -2.5, 9.7],
                          ids=['0', '-5', '10', '-2.5', '9.7'])
 @pytest.mark.parametrize("stop",
-                         [None, 10, -2, 20.5, 10**5],
+                         [None, 10, -2, 20.5, 1000],
                          ids=['None', '10', '-2', '20.5', '10**5'])
 @pytest.mark.parametrize("step",
                          [None, 1, 2.7, -1.6, 100],
@@ -26,17 +26,6 @@ def test_arange(start, stop, step, dtype):
         # numpy casts to float32 type when computes float16 data
         rtol_mult = 4
 
-        # secure there is no 'inf' elements in resulting array
-        max = numpy.finfo(dtype).max
-        if stop is not None and stop > max:
-            # consider comulative accuracy while generating array
-            # to calculate maximum allowed 'stop' value for dtype=float16
-            arr_len = (max - start) / (step if step is not None else 1)
-            arr_ilen = int(arr_len)
-            arr_len = (arr_ilen + 1) if float(arr_ilen) < arr_len else arr_ilen
-            acc = rtol_mult * numpy.finfo(dtype).eps
-            stop = max - acc * arr_len
-
     exp_array = numpy.arange(start, stop=stop, step=step, dtype=dtype)
 
     dpnp_array = dpnp.arange(start, stop=stop, step=step, dtype=dtype)
diff --git a/tests/test_random_state.py b/tests/test_random_state.py
index c09b5c17a880..9d2f14643c84 100644
--- a/tests/test_random_state.py
+++ b/tests/test_random_state.py
@@ -15,6 +15,12 @@
 )
 
 
+def assert_cfd(data, exp_sycl_queue, exp_usm_type=None):
+    assert exp_sycl_queue == data.sycl_queue
+    if exp_usm_type:
+        assert exp_usm_type == data.usm_type
+
+
 class TestNormal:
     @pytest.mark.parametrize("dtype",
                              [dpnp.float32, dpnp.float64, None],
@@ -47,7 +53,7 @@ def test_distr(self, dtype, usm_type):
         assert_array_almost_equal(dpnp.asnumpy(data), desired, decimal=precision)
 
         # check if compute follows data isn't broken
-        assert sycl_queue == data.sycl_queue
+        assert_cfd(data, sycl_queue, usm_type)
 
 
     @pytest.mark.parametrize("dtype",
@@ -138,7 +144,7 @@ def test_fallback(self, loc, scale):
         assert_array_almost_equal(actual, desired, decimal=precision)
 
         # check if compute follows data isn't broken
-        assert sycl_queue == data.sycl_queue
+        assert_cfd(data, sycl_queue)
 
 
     @pytest.mark.parametrize("dtype",
@@ -174,17 +180,17 @@ def test_distr(self, usm_type):
 
         precision = numpy.finfo(dtype=numpy.float64).precision
         assert_array_almost_equal(dpnp.asnumpy(data), desired, decimal=precision)
-        assert sycl_queue == data.sycl_queue
+        assert_cfd(data, sycl_queue, usm_type)
 
         # call with the same seed has to draw the same values
         data = RandomState(seed, sycl_queue=sycl_queue).rand(3, 2, usm_type=usm_type)
         assert_array_almost_equal(dpnp.asnumpy(data), desired, decimal=precision)
-        assert sycl_queue == data.sycl_queue
+        assert_cfd(data, sycl_queue, usm_type)
 
         # call with omitted dimensions has to draw the first element from desired
         data = RandomState(seed, sycl_queue=sycl_queue).rand(usm_type=usm_type)
         assert_array_almost_equal(dpnp.asnumpy(data), desired[0, 0], decimal=precision)
-        assert sycl_queue == data.sycl_queue
+        assert_cfd(data, sycl_queue, usm_type)
 
         # rand() is an alias on random_sample(), map arguments
         with mock.patch('dpnp.random.RandomState.random_sample') as m:
@@ -245,7 +251,7 @@ def test_distr(self, dtype, usm_type):
                                [5, 3],
                                [5, 7]], dtype=numpy.int32)
         assert_array_equal(dpnp.asnumpy(data), desired)
-        assert sycl_queue == data.sycl_queue
+        assert_cfd(data, sycl_queue, usm_type)
 
         # call with the same seed has to draw the same values
         data = RandomState(seed, sycl_queue=sycl_queue).randint(low=low,
@@ -254,7 +260,7 @@ def test_distr(self, dtype, usm_type):
                                                                 dtype=dtype,
                                                                 usm_type=usm_type)
         assert_array_equal(dpnp.asnumpy(data), desired)
-        assert sycl_queue == data.sycl_queue
+        assert_cfd(data, sycl_queue, usm_type)
 
         # call with omitted dimensions has to draw the first element from desired
         data = RandomState(seed, sycl_queue=sycl_queue).randint(low=low,
@@ -262,7 +268,7 @@ def test_distr(self, dtype, usm_type):
                                                                 dtype=dtype,
                                                                 usm_type=usm_type)
         assert_array_equal(dpnp.asnumpy(data), desired[0, 0])
-        assert sycl_queue == data.sycl_queue
+        assert_cfd(data, sycl_queue, usm_type)
 
         # rand() is an alias on random_sample(), map arguments
         with mock.patch('dpnp.random.RandomState.uniform') as m:
@@ -701,7 +707,7 @@ def test_distr(self, bounds, dtype, usm_type):
             assert_array_equal(dpnp.asnumpy(data), desired)
 
         # check if compute follows data isn't broken
-        assert sycl_queue == data.sycl_queue
+        assert_cfd(data, sycl_queue, usm_type)
 
 
     @pytest.mark.parametrize("dtype",
@@ -766,7 +772,7 @@ def test_fallback(self, low, high):
         assert_array_almost_equal(actual, desired, decimal=precision)
 
         # check if compute follows data isn't broken
-        assert sycl_queue == data.sycl_queue
+        assert_cfd(data, sycl_queue)
 
 
     @pytest.mark.parametrize("dtype",
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index e1d902588afe..e3e8680e6aca 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -278,8 +278,7 @@ def test_uniform(usm_type, size):
     high = 2.0
     res = dpnp.random.uniform(low, high, size=size, usm_type=usm_type)
 
-    res_usm_type = res.get_array().usm_type
-    assert usm_type == res_usm_type
+    assert usm_type == res.usm_type
 
 
 @pytest.mark.parametrize("usm_type",
@@ -295,8 +294,7 @@ def test_rs_uniform(usm_type, seed):
     rs = dpnp.random.RandomState(seed, sycl_queue=sycl_queue)
     res = rs.uniform(low, high, usm_type=usm_type)
 
-    res_usm_type = res.get_array().usm_type
-    assert usm_type == res_usm_type
+    assert usm_type == res.usm_type
 
     res_sycl_queue = res.get_array().sycl_queue
     assert_sycl_queue_equal(res_sycl_queue, sycl_queue)

From c7178adaa27bbfdb0c20b9697903ef1da3aed5a7 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Thu, 5 Jan 2023 23:24:24 +0100
Subject: [PATCH 2/7] Add missing content upon the last merge from master
 (#1267)

---
 dpnp/config.py                                |  5 ++
 dpnp/dpnp_utils/dpnp_algo_utils.pyx           |  8 ++++
 dpnp/random/dpnp_iface_random.py              |  2 +-
 dpnp/random/dpnp_random_state.py              | 10 ++--
 tests/conftest.py                             |  4 ++
 tests/test_amin_amax.py                       |  2 +
 tests/test_arithmetic.py                      |  3 ++
 tests/test_arraycreation.py                   |  2 -
 tests/test_arraymanipulation.py               | 11 +++++
 tests/test_bitwise.py                         |  6 +++
 tests/test_histograms.py                      |  4 ++
 tests/test_indexing.py                        |  9 ++++
 tests/test_linalg.py                          |  6 +++
 tests/test_logic.py                           |  5 ++
 tests/test_manipulation.py                    |  2 +
 tests/test_mathematical.py                    | 48 +++++++++----------
 tests/test_random.py                          | 30 ++++++++++++
 tests/test_random_state.py                    |  9 ++++
 tests/test_sort.py                            |  1 +
 tests/test_statistics.py                      |  3 ++
 tests/test_strides.py                         |  2 +
 tests/test_sycl_queue.py                      |  3 ++
 tests/test_umath.py                           |  1 +
 .../core_tests/test_ndarray_conversion.py     |  1 +
 .../core_tests/test_ndarray_copy_and_view.py  |  2 +
 .../cupy/core_tests/test_ndarray_math.py      |  2 +
 .../cupy/core_tests/test_ndarray_reduction.py |  2 +
 .../cupy/creation_tests/test_from_data.py     |  1 +
 .../cupy/creation_tests/test_matrix.py        |  8 ++++
 .../cupy/creation_tests/test_ranges.py        |  8 ++++
 tests/third_party/cupy/fft_tests/test_fft.py  |  9 ++++
 .../cupy/indexing_tests/test_generate.py      |  1 +
 .../cupy/indexing_tests/test_indexing.py      |  8 ++++
 .../cupy/indexing_tests/test_insert.py        |  6 +++
 .../cupy/linalg_tests/test_einsum.py          |  8 ++++
 .../cupy/linalg_tests/test_product.py         | 13 +++++
 .../cupy/logic_tests/test_comparison.py       |  4 ++
 .../cupy/logic_tests/test_content.py          |  4 ++
 .../third_party/cupy/logic_tests/test_ops.py  |  5 ++
 .../cupy/manipulation_tests/test_basic.py     |  4 ++
 .../cupy/manipulation_tests/test_dims.py      |  4 ++
 .../cupy/manipulation_tests/test_tiling.py    |  4 ++
 .../cupy/manipulation_tests/test_transpose.py |  9 ++++
 .../cupy/math_tests/test_arithmetic.py        |  4 ++
 .../cupy/math_tests/test_matmul.py            |  3 ++
 .../cupy/math_tests/test_rounding.py          |  2 +
 .../cupy/math_tests/test_sumprod.py           | 11 +++++
 .../cupy/random_tests/test_distributions.py   | 25 ++++++++++
 .../cupy/random_tests/test_sample.py          |  8 ++++
 .../cupy/sorting_tests/test_search.py         | 18 +++++++
 .../cupy/sorting_tests/test_sort.py           | 10 ++++
 .../cupy/statistics_tests/test_correlation.py |  6 +++
 .../cupy/statistics_tests/test_histogram.py   |  1 +
 .../cupy/statistics_tests/test_meanvar.py     | 21 ++++++++
 54 files changed, 355 insertions(+), 33 deletions(-)

diff --git a/dpnp/config.py b/dpnp/config.py
index 9298994a8421..58a35f88a6e4 100644
--- a/dpnp/config.py
+++ b/dpnp/config.py
@@ -47,3 +47,8 @@
 '''
 Explicitly use SYCL shared memory parameter in DPCtl array constructor for creation functions
 '''
+
+__DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK__ = int(os.getenv('DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK', 1))
+'''
+Trigger non-implemented exception when DPNP fallbacks on NumPy implementation
+'''
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index 09b46609bbd9..c09bef8ec485 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -126,6 +126,14 @@ def call_origin(function, *args, **kwargs):
     Call fallback function for unsupported cases
     """
 
+    allow_fallback = kwargs.pop("allow_fallback", False)
+
+    if not allow_fallback and config.__DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK__ == 1:
+        raise NotImplementedError(f"Requested funtion={function.__name__} with args={args} and kwargs={kwargs} "
+                                   "isn't currently supported and would fall back on NumPy implementation. "
+                                   "Define enviroment variable `DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK` to `0` "
+                                   "if the fall back is required to be supported without rasing an exception.")
+
     dpnp_inplace = kwargs.pop("dpnp_inplace", False)
     sycl_queue = kwargs.pop("sycl_queue", None)
     # print(f"DPNP call_origin(): Fallback called. \n\t function={function}, \n\t args={args}, \n\t kwargs={kwargs}, \n\t dpnp_inplace={dpnp_inplace}")
diff --git a/dpnp/random/dpnp_iface_random.py b/dpnp/random/dpnp_iface_random.py
index c71cad80ae4a..677f2a7e94bc 100644
--- a/dpnp/random/dpnp_iface_random.py
+++ b/dpnp/random/dpnp_iface_random.py
@@ -1314,7 +1314,7 @@ def seed(seed=None):
             dpnp_rng_srand(seed)
 
     # always reseed numpy engine also
-    return call_origin(numpy.random.seed, seed)
+    return call_origin(numpy.random.seed, seed, allow_fallback=True)
 
 
 def standard_cauchy(size=None):
diff --git a/dpnp/random/dpnp_random_state.py b/dpnp/random/dpnp_random_state.py
index f58d84e8b6bb..1d4648c31c47 100644
--- a/dpnp/random/dpnp_random_state.py
+++ b/dpnp/random/dpnp_random_state.py
@@ -345,9 +345,10 @@ def randint(self, low, high=None, size=None, dtype=int, usm_type="device"):
 
                     min_int = numpy.iinfo('int32').min
                     max_int = numpy.iinfo('int32').max
-                    if not dpnp.isfinite(low) or low > max_int or low < min_int:
+
+                    if not self._is_finite_scalar(low) or low > max_int or low < min_int:
                         raise OverflowError(f"Range of low={low} exceeds valid bounds")
-                    elif not dpnp.isfinite(high) or high > max_int or high < min_int:
+                    elif not self._is_finite_scalar(high) or high > max_int or high < min_int:
                         raise OverflowError(f"Range of high={high} exceeds valid bounds")
 
                     low = int(low)
@@ -547,9 +548,10 @@ def uniform(self, low=0.0, high=1.0, size=None, dtype=None, usm_type="device"):
             else:
                 min_double = numpy.finfo('double').min
                 max_double = numpy.finfo('double').max
-                if not dpnp.isfinite(low) or low >= max_double or low <= min_double:
+
+                if not self._is_finite_scalar(low) or low >= max_double or low <= min_double:
                     raise OverflowError(f"Range of low={low} exceeds valid bounds")
-                elif not dpnp.isfinite(high) or high >= max_double or high <= min_double:
+                elif not self._is_finite_scalar(high) or high >= max_double or high <= min_double:
                     raise OverflowError(f"Range of high={high} exceeds valid bounds")
 
                 if low > high:
diff --git a/tests/conftest.py b/tests/conftest.py
index d9cbbb593e36..78d3180bac08 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -73,3 +73,7 @@ def pytest_collection_modifyitems(config, items):
             # exact match of the test name with items from excluded_list
             if test_name == item_tbl_str:
                 item.add_marker(skip_mark)
+
+@pytest.fixture
+def allow_fall_back_on_numpy(monkeypatch):
+    monkeypatch.setattr(dpnp.config, '__DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK__', 0)
diff --git a/tests/test_amin_amax.py b/tests/test_amin_amax.py
index 90c92138e81e..442690cc15d3 100644
--- a/tests/test_amin_amax.py
+++ b/tests/test_amin_amax.py
@@ -67,6 +67,7 @@ def _get_min_max_input(type, shape):
     return a.reshape(shape)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("type",
                          [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
                          ids=['float64', 'float32', 'int64', 'int32'])
@@ -87,6 +88,7 @@ def test_amax(type, shape):
     numpy.testing.assert_array_equal(dpnp_res, np_res)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("type",
                          [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
                          ids=['float64', 'float32', 'int64', 'int32'])
diff --git a/tests/test_arithmetic.py b/tests/test_arithmetic.py
index 6eb635e57730..aacb749ccc83 100644
--- a/tests/test_arithmetic.py
+++ b/tests/test_arithmetic.py
@@ -1,4 +1,5 @@
 import unittest
+import pytest
 
 from tests.third_party.cupy import testing
 
@@ -21,12 +22,14 @@ def test_modf_part2(self, xp, dtype):
 
         return c
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_float_dtypes()
     @testing.numpy_cupy_allclose()
     def test_nanprod(self, xp, dtype):
         a = xp.array([-2.5, -1.5, xp.nan, 10.5, 1.5, xp.nan], dtype=dtype)
         return xp.nanprod(a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_float_dtypes()
     @testing.numpy_cupy_allclose()
     def test_nansum(self, xp, dtype):
diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py
index ac2eca1e1ed1..5bb9795bbac8 100644
--- a/tests/test_arraycreation.py
+++ b/tests/test_arraycreation.py
@@ -258,8 +258,6 @@ def test_identity(n, dtype):
 
     assert_array_equal(func(numpy), func(dpnp))
 
-    assert_array_equal(func(numpy), func(dpnp))
-
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("dtype",
diff --git a/tests/test_arraymanipulation.py b/tests/test_arraymanipulation.py
index 9b06bf9596d3..c0cd3e6c2b1e 100644
--- a/tests/test_arraymanipulation.py
+++ b/tests/test_arraymanipulation.py
@@ -4,6 +4,7 @@
 import numpy
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("dtype",
                          [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
                          ids=["float64", "float32", "int64", "int32"])
@@ -30,6 +31,7 @@ def test_asfarray2(dtype, data):
     numpy.testing.assert_array_equal(result, expected)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestConcatenate:
     def test_returns_copy(self):
         a = dpnp.array(numpy.eye(3))
@@ -91,9 +93,11 @@ class TestHstack:
     def test_non_iterable(self):
         numpy.testing.assert_raises(TypeError, dpnp.hstack, 1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_empty_input(self):
         numpy.testing.assert_raises(ValueError, dpnp.hstack, ())
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_0D_array(self):
         b = dpnp.array(2)
         a = dpnp.array(1)
@@ -101,6 +105,7 @@ def test_0D_array(self):
         desired = dpnp.array([1, 2])
         numpy.testing.assert_array_equal(res, desired)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_1D_array(self):
         a = dpnp.array([1])
         b = dpnp.array([2])
@@ -108,6 +113,7 @@ def test_1D_array(self):
         desired = dpnp.array([1, 2])
         numpy.testing.assert_array_equal(res, desired)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_2D_array(self):
         a = dpnp.array([[1], [2]])
         b = dpnp.array([[1], [2]])
@@ -126,9 +132,11 @@ class TestVstack:
     def test_non_iterable(self):
         numpy.testing.assert_raises(TypeError, dpnp.vstack, 1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_empty_input(self):
         numpy.testing.assert_raises(ValueError, dpnp.vstack, ())
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_0D_array(self):
         a = dpnp.array(1)
         b = dpnp.array(2)
@@ -136,6 +144,7 @@ def test_0D_array(self):
         desired = dpnp.array([[1], [2]])
         numpy.testing.assert_array_equal(res, desired)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_1D_array(self):
         a = dpnp.array([1])
         b = dpnp.array([2])
@@ -143,6 +152,7 @@ def test_1D_array(self):
         desired = dpnp.array([[1], [2]])
         numpy.testing.assert_array_equal(res, desired)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_2D_array(self):
         a = dpnp.array([[1], [2]])
         b = dpnp.array([[1], [2]])
@@ -150,6 +160,7 @@ def test_2D_array(self):
         desired = dpnp.array([[1], [2], [1], [2]])
         numpy.testing.assert_array_equal(res, desired)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_2D_array2(self):
         a = dpnp.array([1, 2])
         b = dpnp.array([1, 2])
diff --git a/tests/test_bitwise.py b/tests/test_bitwise.py
index 8b21bcd2644a..645ae4556c1c 100644
--- a/tests/test_bitwise.py
+++ b/tests/test_bitwise.py
@@ -37,20 +37,26 @@ def _test_binary_int(self, name, lhs, rhs, dtype):
 
         numpy.testing.assert_array_equal(result, expected)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_bitwise_and(self, lhs, rhs, dtype):
         self._test_binary_int('bitwise_and', lhs, rhs, dtype)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_bitwise_or(self, lhs, rhs, dtype):
         self._test_binary_int('bitwise_or', lhs, rhs, dtype)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_bitwise_xor(self, lhs, rhs, dtype):
         self._test_binary_int('bitwise_xor', lhs, rhs, dtype)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_invert(self, lhs, rhs, dtype):
         self._test_unary_int('invert', lhs, dtype)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_left_shift(self, lhs, rhs, dtype):
         self._test_binary_int('left_shift', lhs, rhs, dtype)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_right_shift(self, lhs, rhs, dtype):
         self._test_binary_int('right_shift', lhs, rhs, dtype)
diff --git a/tests/test_histograms.py b/tests/test_histograms.py
index 825a407d47d3..b95b1712408e 100644
--- a/tests/test_histograms.py
+++ b/tests/test_histograms.py
@@ -13,6 +13,7 @@ def setup(self):
     def teardown(self):
         pass
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_simple(self):
         n = 100
         v = dpnp.random.rand(n)
@@ -24,6 +25,7 @@ def test_simple(self):
         (a, b) = dpnp.histogram(numpy.linspace(0, 10, 100))
         numpy.testing.assert_array_equal(a, 10)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_one_bin(self):
         # Ticket 632
         hist, edges = dpnp.histogram([1, 2, 3, 4], [1, 2])
@@ -66,6 +68,8 @@ def test_density(self):
             [1, 2, 3, 4], [0.5, 1.5, numpy.inf], density=True)
         numpy.testing.assert_equal(counts, [.25, 0])
 
+
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_arr_weights_mismatch(self):
         a = dpnp.arange(10) + .5
         w = dpnp.arange(11) + .5
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 6519576171d0..091cf1345c4e 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -5,6 +5,7 @@
 import numpy
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_choose():
     a = numpy.r_[:4]
     ia = dpnp.array(a)
@@ -109,6 +110,7 @@ def test_nonzero(array):
     numpy.testing.assert_array_equal(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("vals",
                          [[100, 200],
                           (100, 200)],
@@ -138,6 +140,7 @@ def test_place1(arr, mask, vals):
     numpy.testing.assert_array_equal(a, ia)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("vals",
                          [[100, 200],
                           [100, 200, 300, 400, 500, 600],
@@ -161,6 +164,7 @@ def test_place2(arr, mask, vals):
     numpy.testing.assert_array_equal(a, ia)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("vals",
                          [[100, 200],
                           [100, 200, 300, 400, 500, 600],
@@ -243,6 +247,7 @@ def test_put3():
     numpy.testing.assert_array_equal(a, ia)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_put_along_axis_val_int():
     a = numpy.arange(16).reshape(4, 4)
     ai = dpnp.array(a)
@@ -254,6 +259,7 @@ def test_put_along_axis_val_int():
         numpy.testing.assert_array_equal(a, ai)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_put_along_axis1():
     a = numpy.arange(64).reshape(4, 4, 4)
     ai = dpnp.array(a)
@@ -265,6 +271,7 @@ def test_put_along_axis1():
         numpy.testing.assert_array_equal(a, ai)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_put_along_axis2():
     a = numpy.arange(64).reshape(4, 4, 4)
     ai = dpnp.array(a)
@@ -411,6 +418,7 @@ def test_take(array, indices, array_type, indices_type):
     numpy.testing.assert_array_equal(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_take_along_axis():
     a = numpy.arange(16).reshape(4, 4)
     ai = dpnp.array(a)
@@ -422,6 +430,7 @@ def test_take_along_axis():
         numpy.testing.assert_array_equal(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_take_along_axis1():
     a = numpy.arange(64).reshape(4, 4, 4)
     ai = dpnp.array(a)
diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index b9535a7b274f..dd89a18adbd6 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -85,6 +85,7 @@ def test_det(array):
     numpy.testing.assert_allclose(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("type",
                          [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
                          ids=['float64', 'float32', 'int64', 'int32'])
@@ -179,6 +180,7 @@ def test_matrix_rank(type, tol, array):
     numpy.testing.assert_allclose(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("array",
                          [[7], [1, 2], [1, 0]],
                          ids=['[7]', '[1, 2]', '[1, 0]'])
@@ -196,6 +198,7 @@ def test_norm1(array, ord, axis):
     numpy.testing.assert_allclose(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("array",
                          [[[1, 0]], [[1, 2]], [[1, 0], [3, 0]], [[1, 2], [3, 4]]],
                          ids=['[[1, 0]]', '[[1, 2]]', '[[1, 0], [3, 0]]', '[[1, 2], [3, 4]]'])
@@ -213,6 +216,7 @@ def test_norm2(array, ord, axis):
     numpy.testing.assert_array_equal(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("array",
                          [[[[1, 2], [3, 4]], [[5, 6], [7, 8]]], [[[1, 0], [3, 0]], [[5, 0], [7, 0]]]],
                          ids=['[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]', '[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]'])
@@ -230,6 +234,7 @@ def test_norm3(array, ord, axis):
     numpy.testing.assert_array_equal(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("type",
                          [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
                          ids=['float64', 'float32', 'int64', 'int32'])
@@ -273,6 +278,7 @@ def test_qr(type, shape, mode):
     numpy.testing.assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("type",
                          [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
                          ids=['float64', 'float32', 'int64', 'int32'])
diff --git a/tests/test_logic.py b/tests/test_logic.py
index b826740b9bd9..b3280be07618 100644
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@@ -95,6 +95,7 @@ def test_any(type, shape):
         numpy.testing.assert_allclose(dpnp_res, np_res)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_greater():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
@@ -104,6 +105,7 @@ def test_greater():
         numpy.testing.assert_equal(dpnp_res, np_res)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_greater_equal():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
@@ -113,6 +115,7 @@ def test_greater_equal():
         numpy.testing.assert_equal(dpnp_res, np_res)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_less():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
@@ -122,6 +125,7 @@ def test_less():
         numpy.testing.assert_equal(dpnp_res, np_res)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_less_equal():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
@@ -131,6 +135,7 @@ def test_less_equal():
         numpy.testing.assert_equal(dpnp_res, np_res)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_not_equal():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
diff --git a/tests/test_manipulation.py b/tests/test_manipulation.py
index 8130fcdb4e86..bb91f5d0d500 100644
--- a/tests/test_manipulation.py
+++ b/tests/test_manipulation.py
@@ -23,6 +23,7 @@ def test_copyto_dtype(in_obj, out_dtype):
     numpy.testing.assert_array_equal(result, expected)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("arr",
                          [[], [1, 2, 3, 4], [[1, 2], [3, 4]], [[[1], [2]], [[3], [4]]]],
                          ids=['[]', '[1, 2, 3, 4]', '[[1, 2], [3, 4]]', '[[[1], [2]], [[3], [4]]]'])
@@ -34,6 +35,7 @@ def test_repeat(arr):
     numpy.testing.assert_array_equal(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("array",
                          [[1, 2, 3],
                           [1, 2, 2, 1, 2, 4],
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index c74a96a07f5f..21071bec41e9 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -5,6 +5,7 @@
 import numpy
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestConvolve:
     def test_object(self):
         d = [1.] * 100
@@ -33,6 +34,7 @@ def test_mode(self):
             dpnp.convolve(d, k, mode=None)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("array",
                          [[[0, 0], [0, 0]],
                           [[1, 2], [1, 2]],
@@ -98,42 +100,54 @@ def _test_mathematical(self, name, dtype, lhs, rhs):
 
         numpy.testing.assert_allclose(result, expected, atol=1e-4)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_add(self, dtype, lhs, rhs):
         self._test_mathematical('add', dtype, lhs, rhs)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_arctan2(self, dtype, lhs, rhs):
         self._test_mathematical('arctan2', dtype, lhs, rhs)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_copysign(self, dtype, lhs, rhs):
         self._test_mathematical('copysign', dtype, lhs, rhs)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_divide(self, dtype, lhs, rhs):
         self._test_mathematical('divide', dtype, lhs, rhs)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_fmod(self, dtype, lhs, rhs):
         self._test_mathematical('fmod', dtype, lhs, rhs)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_floor_divide(self, dtype, lhs, rhs):
         self._test_mathematical('floor_divide', dtype, lhs, rhs)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_hypot(self, dtype, lhs, rhs):
         self._test_mathematical('hypot', dtype, lhs, rhs)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_maximum(self, dtype, lhs, rhs):
         self._test_mathematical('maximum', dtype, lhs, rhs)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_minimum(self, dtype, lhs, rhs):
         self._test_mathematical('minimum', dtype, lhs, rhs)
 
     def test_multiply(self, dtype, lhs, rhs):
         self._test_mathematical('multiply', dtype, lhs, rhs)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_remainder(self, dtype, lhs, rhs):
         self._test_mathematical('remainder', dtype, lhs, rhs)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_power(self, dtype, lhs, rhs):
         self._test_mathematical('power', dtype, lhs, rhs)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_subtract(self, dtype, lhs, rhs):
         self._test_mathematical('subtract', dtype, lhs, rhs)
 
@@ -187,6 +201,7 @@ def test_multiply_scalar2(shape, dtype):
     numpy.testing.assert_array_equal(result, expected)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("array", [[1, 2, 3, 4, 5],
                                    [1, 2, numpy.nan, 4, 5],
                                    [[1, 2, numpy.nan], [3, -4, -5]]])
@@ -199,6 +214,7 @@ def test_nancumprod(array):
     numpy.testing.assert_array_equal(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("array", [[1, 2, 3, 4, 5],
                                    [1, 2, numpy.nan, 4, 5],
                                    [[1, 2, numpy.nan], [3, -4, -5]]])
@@ -226,6 +242,7 @@ def test_negative(data, dtype):
     numpy.testing.assert_array_equal(result, expected)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("val_type",
                          [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
                          ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32'])
@@ -270,6 +287,8 @@ def test_ediff1d_int(self, array, data_type):
         expected = numpy.ediff1d(np_a)
         numpy.testing.assert_array_equal(expected, result)
 
+    
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_ediff1d_args(self):
         np_a = numpy.array([1, 2, 4, 7, 0])
 
@@ -281,6 +300,7 @@ def test_ediff1d_args(self):
         numpy.testing.assert_array_equal(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestTrapz:
     @pytest.mark.parametrize("data_type",
                              [numpy.float64, numpy.float32, numpy.int64, numpy.int32])
@@ -336,6 +356,7 @@ def test_trapz_with_dx_params(self, y_array, dx):
         numpy.testing.assert_array_equal(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestCross:
 
     @pytest.mark.parametrize("axis", [None, 0],
@@ -370,32 +391,7 @@ def test_cross_3x3(self, x1, x2, axisa, axisb, axisc, axis):
         numpy.testing.assert_array_equal(expected, result)
 
 
-class TestGradient:
-
-    @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9],
-                                       [3., 4., 7.5, 9.],
-                                       [2, 6, 8, 10]])
-    def test_gradient_y1(self, array):
-        np_y = numpy.array(array)
-        dpnp_y = dpnp.array(array)
-
-        result = dpnp.gradient(dpnp_y)
-        expected = numpy.gradient(np_y)
-        numpy.testing.assert_array_equal(expected, result)
-
-    @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9],
-                                       [3., 4., 7.5, 9.],
-                                       [2, 6, 8, 10]])
-    @pytest.mark.parametrize("dx", [2, 3.5])
-    def test_gradient_y1_dx(self, array, dx):
-        np_y = numpy.array(array)
-        dpnp_y = dpnp.array(array)
-
-        result = dpnp.gradient(dpnp_y, dx)
-        expected = numpy.gradient(np_y, dx)
-        numpy.testing.assert_array_equal(expected, result)
-
-
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestGradient:
 
     @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9],
diff --git a/tests/test_random.py b/tests/test_random.py
index fa7da686065f..54cb2fa3a4d7 100644
--- a/tests/test_random.py
+++ b/tests/test_random.py
@@ -75,6 +75,7 @@ def test_input_shape(func):
     assert shape == res.shape
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("func",
                          [dpnp.random.random,
                           dpnp.random.random_sample,
@@ -139,6 +140,7 @@ def test_randn_normal_distribution():
     assert math.isclose(mean, expected_mean, abs_tol=0.03)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsBeta(TestDistribution):
 
     def test_moments(self):
@@ -162,6 +164,7 @@ def test_seed(self):
         self.check_seed('beta', {'a': a, 'b': b})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsBinomial(TestDistribution):
 
     def test_extreme_value(self):
@@ -199,6 +202,7 @@ def test_seed(self):
         self.check_seed('binomial', {'n': n, 'p': p})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsChisquare(TestDistribution):
 
     def test_invalid_args(self):
@@ -210,6 +214,7 @@ def test_seed(self):
         self.check_seed('chisquare', {'df': df})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsExponential(TestDistribution):
 
     def test_invalid_args(self):
@@ -221,6 +226,7 @@ def test_seed(self):
         self.check_seed('exponential', {'scale': scale})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsF(TestDistribution):
 
     def test_moments(self):
@@ -248,6 +254,7 @@ def test_seed(self):
         self.check_seed('f', {'dfnum': dfnum, 'dfden': dfden})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsGamma(TestDistribution):
 
     def test_moments(self):
@@ -271,6 +278,7 @@ def test_seed(self):
         self.check_seed('gamma', {'shape': shape})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsGeometric(TestDistribution):
 
     def test_extreme_value(self):
@@ -294,6 +302,7 @@ def test_seed(self):
         self.check_seed('geometric', {'p': p})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsGumbel(TestDistribution):
 
     def test_extreme_value(self):
@@ -323,6 +332,7 @@ def test_seed(self):
         self.check_seed('gumbel', {'loc': loc, 'scale': scale})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsHypergeometric(TestDistribution):
 
     def test_extreme_value(self):
@@ -389,6 +399,7 @@ def test_seed(self):
                         {'ngood': ngood, 'nbad': nbad, 'nsample': nsample})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsLaplace(TestDistribution):
 
     def test_extreme_value(self):
@@ -418,6 +429,7 @@ def test_seed(self):
         self.check_seed('laplace', {'loc': loc, 'scale': scale})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsLogistic(TestDistribution):
 
     def test_moments(self):
@@ -440,6 +452,7 @@ def test_seed(self):
         self.check_seed('logistic', {'loc': loc, 'scale': scale})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsLognormal(TestDistribution):
 
     def test_extreme_value(self):
@@ -468,6 +481,7 @@ def test_seed(self):
         self.check_seed('lognormal', {'mean': mean, 'sigma': sigma})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsMultinomial(TestDistribution):
 
     def test_extreme_value(self):
@@ -514,6 +528,7 @@ def test_seed1(self):
         self.check_seed('multinomial', {'n': n, 'pvals': pvals})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsMultivariateNormal(TestDistribution):
 
     def test_moments(self):
@@ -553,6 +568,7 @@ def test_seed(self):
         self.check_seed('multivariate_normal', {'mean': mean, 'cov': cov})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsNegativeBinomial(TestDistribution):
 
     def test_extreme_value(self):
@@ -609,6 +625,7 @@ def test_seed(self):
         self.check_seed('normal', {'loc': loc, 'scale': scale})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsNoncentralChisquare:
 
     @pytest.mark.parametrize("df", [5.0, 1.0, 0.5], ids=['df_grt_1', 'df_eq_1', 'df_less_1'])
@@ -648,6 +665,7 @@ def test_seed(self, df):
         assert_allclose(a1, a2, rtol=1e-07, atol=0)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsPareto(TestDistribution):
 
     def test_moments(self):
@@ -667,6 +685,7 @@ def test_seed(self):
         self.check_seed('pareto', {'a': a})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsPoisson(TestDistribution):
 
     def test_extreme_value(self):
@@ -689,6 +708,7 @@ def test_seed(self):
         self.check_seed('poisson', {'lam': lam})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsPower(TestDistribution):
 
     def test_moments(self):
@@ -709,6 +729,7 @@ def test_seed(self):
         self.check_seed('power', {'a': a})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsRayleigh(TestDistribution):
 
     def test_extreme_value(self):
@@ -750,6 +771,7 @@ def test_seed(self):
         self.check_seed('standard_exponential', {})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsStandardGamma(TestDistribution):
 
     def test_extreme_value(self):
@@ -782,6 +804,7 @@ def test_seed(self):
         self.check_seed('standard_normal', {})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsStandardT(TestDistribution):
 
     def test_moments(self):
@@ -799,6 +822,7 @@ def test_seed(self):
         self.check_seed('standard_t', {'df': 10.0})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsTriangular(TestDistribution):
 
     def test_moments(self):
@@ -851,6 +875,7 @@ def test_seed(self):
         self.check_seed('uniform', {'low': low, 'high': high})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsVonmises:
 
     @pytest.mark.parametrize("kappa", [5.0, 0.5], ids=['large_kappa', 'small_kappa'])
@@ -887,6 +912,7 @@ def test_seed(self, kappa):
         assert_allclose(a1, a2, rtol=1e-07, atol=0)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsWald(TestDistribution):
 
     def test_moments(self):
@@ -913,6 +939,7 @@ def test_seed(self):
         self.check_seed('wald', {'mean': mean, 'scale': scale})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsWeibull(TestDistribution):
 
     def test_extreme_value(self):
@@ -929,6 +956,7 @@ def test_seed(self):
         self.check_seed('weibull', {'a': a})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDistributionsZipf(TestDistribution):
 
     def test_invalid_args(self):
@@ -1018,6 +1046,8 @@ def test_shuffle1(self, conv):
         desired = conv(dpnp_1d)
         assert_array_equal(actual, desired)
 
+
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("conv", [lambda x: x,
                                       lambda x: [(i, i) for i in x]],
                              ids=['lambda x: x',
diff --git a/tests/test_random_state.py b/tests/test_random_state.py
index 95a1ab0d48af..b93f52411c5d 100644
--- a/tests/test_random_state.py
+++ b/tests/test_random_state.py
@@ -146,6 +146,7 @@ def test_extreme_bounds(self):
                                  "with the following message:\n\n%s" % str(e))
 
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("scale",
                              [dpnp.array([3]), numpy.array([3])],
                              ids=['dpnp.array([3])', 'numpy.array([3])'])
@@ -318,6 +319,7 @@ def test_negative_bounds(self):
         assert_array_equal(actual, desired)
 
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_negative_interval(self):
         rs = RandomState(3567)
 
@@ -384,6 +386,7 @@ def test_full_range(self):
                                  "with the following message:\n\n%s" % str(e))
 
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_in_bounds_fuzz(self):
         for high in [4, 8, 16]:
             vals = RandomState().randint(2, high, size=2**16)
@@ -399,6 +402,7 @@ def test_zero_size(self, zero_size):
         assert_equal(RandomState().randint(0, 10, size=zero_size).shape, exp_shape)
 
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("high",
                              [dpnp.array([3]), numpy.array([3])],
                              ids=['dpnp.array([3])', 'numpy.array([3])'])
@@ -415,6 +419,7 @@ def test_bounds_fallback(self, low, high):
         assert_equal(actual, desired)
 
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("dtype",
                              [dpnp.int64, dpnp.integer, dpnp.bool, dpnp.bool_, bool],
                              ids=['dpnp.int64', 'dpnp.integer', 'dpnp.bool', 'dpnp.bool_', 'bool'])
@@ -526,6 +531,7 @@ def test_scalar(self, func):
         assert_array_almost_equal(a1, a2, decimal=precision)
 
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("seed",
                              [range(3),
                               numpy.arange(3, dtype=numpy.int32),
@@ -558,6 +564,7 @@ def test_invalid_type(self, seed):
         assert_raises(TypeError, RandomState, seed)
 
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("seed",
                              [-1, [-3, 7], (17, 3, -5), [4, 3, 2, 1], (7, 6, 5, 1),
                               range(-1, -11, -1),
@@ -781,6 +788,7 @@ def test_low_high_equal(self, dtype, usm_type):
             assert_array_almost_equal(actual, desired, decimal=numpy.finfo(dtype=dtype).precision)
 
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_range_bounds(self):
         fmin = numpy.finfo('double').min
         fmax = numpy.finfo('double').max
@@ -796,6 +804,7 @@ def test_range_bounds(self):
         func(low=numpy.nextafter(fmin, 0), high=numpy.nextafter(fmax, 0))
 
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("high",
                              [dpnp.array([3]), numpy.array([3])],
                              ids=['dpnp.array([3])', 'numpy.array([3])'])
diff --git a/tests/test_sort.py b/tests/test_sort.py
index 205dddafd9c9..aa633c0c3ad9 100644
--- a/tests/test_sort.py
+++ b/tests/test_sort.py
@@ -32,6 +32,7 @@ def test_partition(array, dtype, kth):
     numpy.testing.assert_array_equal(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("side",
                          ["left", "right"],
                          ids=['"left"', '"right"'])
diff --git a/tests/test_statistics.py b/tests/test_statistics.py
index 7973b008392b..04a765a73bce 100644
--- a/tests/test_statistics.py
+++ b/tests/test_statistics.py
@@ -20,6 +20,7 @@ def test_median(type, size):
     numpy.testing.assert_allclose(dpnp_res, np_res)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("axis",
                          [0, 1, -1, 2, -2, (1, 2), (0, -2)])
 def test_max(axis):
@@ -32,6 +33,7 @@ def test_max(axis):
     numpy.testing.assert_allclose(dpnp_res, np_res)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("array",
                          [[2, 0, 6, 2],
                           [2, 0, 6, 2, 5, 6, 7, 8],
@@ -74,6 +76,7 @@ def test_nanvar(array):
     numpy.testing.assert_array_equal(expected, result)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestBincount:
 
     @pytest.mark.parametrize("array",
diff --git a/tests/test_strides.py b/tests/test_strides.py
index 08f3bbed0ae6..7ec1d6b3f03f 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -190,6 +190,7 @@ def test_strides_copysign(dtype, shape):
     numpy.testing.assert_allclose(result, expected)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("dtype",
                          [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
                          ids=["float64", "float32", "int64", "int32"])
@@ -209,6 +210,7 @@ def test_strides_fmod(dtype, shape):
     numpy.testing.assert_allclose(result, expected)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("dtype",
                          [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
                          ids=["float64", "float32", "int64", "int32"])
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 1d6914a0552f..26a71eef2936 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -367,6 +367,7 @@ def test_rs_uniform(usm_type, seed):
     assert_sycl_queue_equal(res_sycl_queue, sycl_queue)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize(
     "func,data1,data2",
     [
@@ -527,6 +528,7 @@ def test_det(device):
     assert_sycl_queue_equal(result_queue, expected_queue)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("device",
                           valid_devices,
                           ids=[device.filter_string for device in valid_devices])
@@ -653,6 +655,7 @@ def test_qr(device):
     assert_sycl_queue_equal(dpnp_r_queue, expected_queue)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("device",
                         valid_devices,
                         ids=[device.filter_string for device in valid_devices])
diff --git a/tests/test_umath.py b/tests/test_umath.py
index 52941ba94cb5..6122b253ca37 100644
--- a/tests/test_umath.py
+++ b/tests/test_umath.py
@@ -58,6 +58,7 @@ def get_id(val):
     return val.__str__()
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize('test_cases', test_cases, ids=get_id)
 def test_umaths(test_cases):
     umath, args_str = test_cases
diff --git a/tests/third_party/cupy/core_tests/test_ndarray_conversion.py b/tests/third_party/cupy/core_tests/test_ndarray_conversion.py
index f724bb454a98..2fe8952a56d0 100644
--- a/tests/third_party/cupy/core_tests/test_ndarray_conversion.py
+++ b/tests/third_party/cupy/core_tests/test_ndarray_conversion.py
@@ -12,6 +12,7 @@
     {'shape': (1,)},
     {'shape': (1, 1, 1)},
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestNdarrayItem(unittest.TestCase):
 
     @testing.for_all_dtypes()
diff --git a/tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py b/tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py
index 1a5a87fbae40..b026377e3e2a 100644
--- a/tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py
+++ b/tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py
@@ -159,12 +159,14 @@ def test_astype_strides_broadcast(self, xp, src_dtype, dst_dtype):
         return numpy.array(
             astype_without_warning(src, dst_dtype, order='K').strides)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
     def test_diagonal1(self, xp, dtype):
         a = testing.shaped_arange((3, 4, 5), xp, dtype)
         return a.diagonal(1, 2, 0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
     def test_diagonal2(self, xp, dtype):
diff --git a/tests/third_party/cupy/core_tests/test_ndarray_math.py b/tests/third_party/cupy/core_tests/test_ndarray_math.py
index 36bd6979c6ec..816261517c4c 100644
--- a/tests/third_party/cupy/core_tests/test_ndarray_math.py
+++ b/tests/third_party/cupy/core_tests/test_ndarray_math.py
@@ -1,4 +1,5 @@
 import unittest
+import pytest
 
 import numpy
 
@@ -9,6 +10,7 @@
 @testing.parameterize(*testing.product({
     'decimals': [-2, -1, 0, 1, 2],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestRound(unittest.TestCase):
 
     shape = (20,)
diff --git a/tests/third_party/cupy/core_tests/test_ndarray_reduction.py b/tests/third_party/cupy/core_tests/test_ndarray_reduction.py
index 6f5f466062d0..34275e917cf6 100644
--- a/tests/third_party/cupy/core_tests/test_ndarray_reduction.py
+++ b/tests/third_party/cupy/core_tests/test_ndarray_reduction.py
@@ -1,4 +1,5 @@
 import unittest
+import pytest
 
 import numpy
 
@@ -7,6 +8,7 @@
 from tests.third_party.cupy import testing
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestArrayReduction(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/creation_tests/test_from_data.py b/tests/third_party/cupy/creation_tests/test_from_data.py
index a2fecb8d6418..e07d927b1cf0 100644
--- a/tests/third_party/cupy/creation_tests/test_from_data.py
+++ b/tests/third_party/cupy/creation_tests/test_from_data.py
@@ -414,6 +414,7 @@ def test_asarray_cuda_array_zero_dim_dtype(self, xp):
         return xp.ascontiguousarray(a, dtype=numpy.int64)
 
     # @testing.for_CF_orders()
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_orders('C')
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
diff --git a/tests/third_party/cupy/creation_tests/test_matrix.py b/tests/third_party/cupy/creation_tests/test_matrix.py
index 5b2fb7cc51a6..a5471f213ebf 100644
--- a/tests/third_party/cupy/creation_tests/test_matrix.py
+++ b/tests/third_party/cupy/creation_tests/test_matrix.py
@@ -60,16 +60,19 @@ def test_diag_construction_from_tuple(self, xp):
         self.assertIsInstance(r, xp.ndarray)
         return r
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_diag_scaler(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
                 xp.diag(1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_diag_0dim(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
                 xp.diag(xp.zeros(()))
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_diag_3dim(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
@@ -90,14 +93,17 @@ def test_diagflat3(self, xp):
         a = testing.shaped_arange((3, 3), xp)
         return xp.diagflat(a, -2)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_diagflat_from_scalar(self, xp):
         return xp.diagflat(3)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_diagflat_from_scalar_with_k0(self, xp):
         return xp.diagflat(3, 0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_diagflat_from_scalar_with_k1(self, xp):
         return xp.diagflat(3, 1)
@@ -142,6 +148,7 @@ def test_tril(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
         return xp.tril(m)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_tril_array_like(self, xp):
         return xp.tril([[1, 2], [3, 4]])
@@ -164,6 +171,7 @@ def test_triu(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
         return xp.triu(m)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_triu_array_like(self, xp):
         return xp.triu([[1, 2], [3, 4]])
diff --git a/tests/third_party/cupy/creation_tests/test_ranges.py b/tests/third_party/cupy/creation_tests/test_ranges.py
index d765858023c2..75960e492c17 100644
--- a/tests/third_party/cupy/creation_tests/test_ranges.py
+++ b/tests/third_party/cupy/creation_tests/test_ranges.py
@@ -206,11 +206,13 @@ def test_linspace_start_stop_list(self, xp, dtype):
         stop = [100, 16]
         return xp.linspace(start, stop, num=50, dtype=dtype)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_bool=True)
     @testing.numpy_cupy_allclose()
     def test_logspace(self, xp, dtype):
         return xp.logspace(0, 2, 5, dtype=dtype)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_bool=True)
     @testing.numpy_cupy_allclose()
     def test_logspace2(self, xp, dtype):
@@ -221,24 +223,29 @@ def test_logspace2(self, xp, dtype):
     def test_logspace_zero_num(self, xp, dtype):
         return xp.logspace(0, 2, 0, dtype=dtype)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_bool=True)
     @testing.numpy_cupy_allclose()
     def test_logspace_one_num(self, xp, dtype):
         return xp.logspace(0, 2, 1, dtype=dtype)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_bool=True)
     @testing.numpy_cupy_allclose()
     def test_logspace_no_endpoint(self, xp, dtype):
         return xp.logspace(0, 2, 5, dtype=dtype, endpoint=False)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_allclose()
     def test_logspace_no_dtype_int(self, xp):
         return xp.logspace(0, 2)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_allclose()
     def test_logspace_no_dtype_float(self, xp):
         return xp.logspace(0.0, 2.0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_allclose()
     def test_logspace_float_args_with_int_dtype(self, xp):
         return xp.logspace(0.1, 2.1, 11, dtype=int)
@@ -248,6 +255,7 @@ def test_logspace_neg_num(self):
             with pytest.raises(ValueError):
                 xp.logspace(0, 10, -1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_bool=True)
     @testing.numpy_cupy_allclose()
     def test_logspace_base(self, xp, dtype):
diff --git a/tests/third_party/cupy/fft_tests/test_fft.py b/tests/third_party/cupy/fft_tests/test_fft.py
index cf23e4a5e902..e56a52f79399 100644
--- a/tests/third_party/cupy/fft_tests/test_fft.py
+++ b/tests/third_party/cupy/fft_tests/test_fft.py
@@ -14,6 +14,7 @@
     'shape': [(0,), (10, 0), (10,), (10, 10)],
     'norm': [None, 'ortho', ''],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestFft(unittest.TestCase):
 
@@ -61,6 +62,7 @@ def test_ifft(self, xp, dtype):
     {'shape': (3, 4), 's': (0, 5), 'axes': None, 'norm': None},
     {'shape': (3, 4), 's': (1, 0), 'axes': None, 'norm': None},
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestFft2(unittest.TestCase):
 
@@ -109,6 +111,7 @@ def test_ifft2(self, xp, dtype):
     {'shape': (2, 0, 5), 's': None, 'axes': None, 'norm': None},
     {'shape': (0, 0, 5), 's': None, 'axes': None, 'norm': None},
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestFftn(unittest.TestCase):
 
@@ -136,6 +139,7 @@ def test_ifftn(self, xp, dtype):
     'shape': [(10,), (10, 10)],
     'norm': [None, 'ortho'],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestRfft(unittest.TestCase):
 
@@ -160,6 +164,7 @@ def test_irfft(self, xp, dtype):
     {'shape': (3, 4), 's': None, 'axes': (), 'norm': None},
     {'shape': (2, 3, 4), 's': None, 'axes': (), 'norm': None},
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestRfft2EmptyAxes(unittest.TestCase):
 
@@ -182,6 +187,7 @@ def test_irfft2(self, dtype):
     {'shape': (3, 4), 's': None, 'axes': (), 'norm': None},
     {'shape': (2, 3, 4), 's': None, 'axes': (), 'norm': None},
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestRfftnEmptyAxes(unittest.TestCase):
 
@@ -205,6 +211,7 @@ def test_irfftn(self, dtype):
     'shape': [(10,), (10, 10)],
     'norm': [None, 'ortho'],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestHfft(unittest.TestCase):
 
@@ -230,6 +237,7 @@ def test_ihfft(self, xp, dtype):
     {'n': 10, 'd': 0.5},
     {'n': 100, 'd': 2},
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestFftfreq(unittest.TestCase):
 
@@ -257,6 +265,7 @@ def test_rfftfreq(self, xp, dtype):
     {'shape': (10, 10), 'axes': 0},
     {'shape': (10, 10), 'axes': (0, 1)},
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestFftshift(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/indexing_tests/test_generate.py b/tests/third_party/cupy/indexing_tests/test_generate.py
index d9be22ed28aa..d10e503bcec8 100644
--- a/tests/third_party/cupy/indexing_tests/test_generate.py
+++ b/tests/third_party/cupy/indexing_tests/test_generate.py
@@ -7,6 +7,7 @@
 from tests.third_party.cupy import testing
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestIndices(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/indexing_tests/test_indexing.py b/tests/third_party/cupy/indexing_tests/test_indexing.py
index e3b6b18162b0..1cdab954bbe8 100644
--- a/tests/third_party/cupy/indexing_tests/test_indexing.py
+++ b/tests/third_party/cupy/indexing_tests/test_indexing.py
@@ -10,16 +10,19 @@
 @testing.gpu
 class TestIndexing(unittest.TestCase):
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_take_by_scalar(self, xp):
         a = testing.shaped_arange((2, 4, 3), xp)
         return a.take(2, axis=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_external_take_by_scalar(self, xp):
         a = testing.shaped_arange((2, 4, 3), xp)
         return xp.take(a, 2, axis=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_take_by_array(self, xp):
         a = testing.shaped_arange((2, 4, 3), xp)
@@ -48,12 +51,14 @@ def test_take_index_range_overflow(self, xp, dtype):
         b = xp.array([0], dtype=dtype)
         return a.take(b)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_take_along_axis(self, xp):
         a = testing.shaped_random((2, 4, 3), xp, dtype='float32')
         b = testing.shaped_random((2, 6, 3), xp, dtype='int64', scale=4)
         return xp.take_along_axis(a, b, axis=-2)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_take_along_axis_none_axis(self, xp):
         a = testing.shaped_random((2, 4, 3), xp, dtype='float32')
@@ -97,6 +102,7 @@ def test_diagonal(self, xp, dtype):
         a = testing.shaped_arange((3, 4, 5), xp, dtype)
         return a.diagonal(1, 2, 0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
     def test_external_diagonal(self, xp, dtype):
@@ -189,6 +195,7 @@ def test_extract_empty_1dim(self, xp):
         return xp.extract(b, a)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestChoose(unittest.TestCase):
 
@@ -339,6 +346,7 @@ def test_select_type_error_condlist(self, dtype):
         with pytest.raises(AttributeError):
             cupy.select(condlist, choicelist)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_bool=True)
     def test_select_type_error_choicelist(self, dtype):
         a, b = list(range(10)), list(range(-10, 0))
diff --git a/tests/third_party/cupy/indexing_tests/test_insert.py b/tests/third_party/cupy/indexing_tests/test_insert.py
index 722e3ee2a1ff..ed6a156e8848 100644
--- a/tests/third_party/cupy/indexing_tests/test_insert.py
+++ b/tests/third_party/cupy/indexing_tests/test_insert.py
@@ -64,6 +64,7 @@ def test_place_shape_unmatch_error(self, dtype):
     'mode': ['raise', 'wrap', 'clip'],
     'n_vals': [0, 1, 3, 4, 5],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestPut(unittest.TestCase):
 
@@ -84,6 +85,7 @@ def test_put(self, xp, dtype):
 @testing.parameterize(*testing.product({
     'shape': [(7,), (2, 3), (4, 3, 2)],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestPutScalars(unittest.TestCase):
 
@@ -110,6 +112,7 @@ def test_put_values_scalar(self, xp):
 @testing.parameterize(*testing.product({
     'shape': [(7,), (2, 3)],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestPutRaises(unittest.TestCase):
 
@@ -178,6 +181,7 @@ def test_putmask(self, xp, dtype):
         return a
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestPutmask(unittest.TestCase):
 
@@ -232,6 +236,7 @@ def test_putmask_differnt_dtypes_mask(self, xp, dtype):
     'val': [1, 0, (2,), (2, 2)],
     'wrap': [True, False],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestFillDiagonal(unittest.TestCase):
 
@@ -308,6 +313,7 @@ def test_diag_indices_from(self, xp):
 @testing.parameterize(*testing.product({
     'shape': [(3, 5), (3, 3, 4), (5,), (0,), (-1,)],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDiagIndicesFromRaises(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/linalg_tests/test_einsum.py b/tests/third_party/cupy/linalg_tests/test_einsum.py
index f4e2f623e908..26c7639ef0ce 100644
--- a/tests/third_party/cupy/linalg_tests/test_einsum.py
+++ b/tests/third_party/cupy/linalg_tests/test_einsum.py
@@ -13,6 +13,7 @@ def _dec_shape(shape, dec):
     return tuple(1 if s == 1 else max(0, s - dec) for s in shape)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestEinSumError(unittest.TestCase):
 
     def test_irregular_ellipsis1(self):
@@ -194,21 +195,25 @@ def test_invalid_sub1(self):
             with pytest.raises(ValueError):
                 xp.einsum(xp.arange(2), [None])
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_invalid_sub2(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
                 xp.einsum(xp.arange(2), [0], [1])
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_invalid_sub3(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
                 xp.einsum(xp.arange(2), [Ellipsis, 0, Ellipsis])
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_dim_mismatch1(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
                 xp.einsum(xp.arange(2), [0], xp.arange(3), [0])
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_dim_mismatch2(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
@@ -219,11 +224,13 @@ def test_dim_mismatch3(self):
             with pytest.raises(ValueError):
                 xp.einsum(xp.arange(6).reshape(2, 3), [0, 0])
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_too_many_dims1(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
                 xp.einsum(3, [0])
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_too_many_dims2(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
@@ -298,6 +305,7 @@ def setUp(self):
 
         self.operands = operands
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_allclose(contiguous_check=False)
     def test_einsum(self, xp):
         # TODO(kataoka): support memory efficient cupy.einsum
diff --git a/tests/third_party/cupy/linalg_tests/test_product.py b/tests/third_party/cupy/linalg_tests/test_product.py
index a4eff836e12c..2a97fa79b7ce 100644
--- a/tests/third_party/cupy/linalg_tests/test_product.py
+++ b/tests/third_party/cupy/linalg_tests/test_product.py
@@ -31,6 +31,7 @@
     'trans_a': [True, False],
     'trans_b': [True, False],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDot(unittest.TestCase):
 
@@ -89,6 +90,7 @@ def test_dot_with_out(self, xp, dtype_a, dtype_b, dtype_c):
         ((2, 4, 5, 2), (2, 4, 5, 2), 0, 0, -1),
     ],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestCrossProduct(unittest.TestCase):
 
@@ -224,6 +226,7 @@ def test_transposed_multidim_vdot(self, xp, dtype):
             (2, 2, 2, 3), xp, dtype).transpose(1, 3, 0, 2)
         return xp.vdot(a, b)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_inner(self, xp, dtype):
@@ -231,6 +234,7 @@ def test_inner(self, xp, dtype):
         b = testing.shaped_reverse_arange((5,), xp, dtype)
         return xp.inner(a, b)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_reversed_inner(self, xp, dtype):
@@ -238,6 +242,7 @@ def test_reversed_inner(self, xp, dtype):
         b = testing.shaped_reverse_arange((5,), xp, dtype)[::-1]
         return xp.inner(a, b)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_multidim_inner(self, xp, dtype):
@@ -273,6 +278,7 @@ def test_multidim_outer(self, xp, dtype):
         b = testing.shaped_arange((4, 5), xp, dtype)
         return xp.outer(a, b)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_tensordot(self, xp, dtype):
@@ -287,6 +293,7 @@ def test_transposed_tensordot(self, xp, dtype):
         b = testing.shaped_arange((4, 3, 2), xp, dtype).transpose(2, 0, 1)
         return xp.tensordot(a, b)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_tensordot_with_int_axes(self, xp, dtype):
@@ -316,6 +323,7 @@ def test_transposed_tensordot_with_int_axes(self, xp, dtype):
                 (5, 4, 3, 2), xp, dtype).transpose(3, 0, 2, 1)
             return xp.tensordot(a, b, axes=3)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_tensordot_with_list_axes(self, xp, dtype):
@@ -392,6 +400,7 @@ def test_zerodim_kron(self, xp, dtype):
         ((0, 0, 0), ([0, 2, 1], [1, 2, 0])),
     ],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestProductZeroLength(unittest.TestCase):
 
@@ -404,6 +413,7 @@ def test_tensordot_zero_length(self, xp, dtype):
 
 
 class TestMatrixPower(unittest.TestCase):
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_matrix_power_0(self, xp, dtype):
@@ -428,6 +438,7 @@ def test_matrix_power_3(self, xp, dtype):
         a = testing.shaped_arange((3, 3), xp, dtype)
         return xp.linalg.matrix_power(a, 3)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-5)
     def test_matrix_power_inv1(self, xp, dtype):
@@ -435,6 +446,7 @@ def test_matrix_power_inv1(self, xp, dtype):
         a = a * a % 30
         return xp.linalg.matrix_power(a, -1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-5)
     def test_matrix_power_inv2(self, xp, dtype):
@@ -442,6 +454,7 @@ def test_matrix_power_inv2(self, xp, dtype):
         a = a * a % 30
         return xp.linalg.matrix_power(a, -2)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-4)
     def test_matrix_power_inv3(self, xp, dtype):
diff --git a/tests/third_party/cupy/logic_tests/test_comparison.py b/tests/third_party/cupy/logic_tests/test_comparison.py
index 32ebc76c9b61..0be9eaeee610 100644
--- a/tests/third_party/cupy/logic_tests/test_comparison.py
+++ b/tests/third_party/cupy/logic_tests/test_comparison.py
@@ -1,5 +1,6 @@
 import operator
 import unittest
+import pytest
 
 import numpy
 
@@ -7,6 +8,7 @@
 from tests.third_party.cupy import testing
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestComparison(unittest.TestCase):
 
@@ -36,6 +38,7 @@ def test_equal(self):
         self.check_binary('equal')
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestComparisonOperator(unittest.TestCase):
 
@@ -160,6 +163,7 @@ def test_allclose_array_scalar(self, xp, dtype):
         return xp.allclose(a, b)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestIsclose(unittest.TestCase):
 
     @testing.for_all_dtypes(no_complex=True)
diff --git a/tests/third_party/cupy/logic_tests/test_content.py b/tests/third_party/cupy/logic_tests/test_content.py
index 32d4f95d1ef6..7172d9b12b2e 100644
--- a/tests/third_party/cupy/logic_tests/test_content.py
+++ b/tests/third_party/cupy/logic_tests/test_content.py
@@ -1,4 +1,5 @@
 import unittest
+import pytest
 
 import numpy
 
@@ -23,11 +24,14 @@ def check_unary_nan(self, name, xp, dtype):
             dtype=dtype)
         return getattr(xp, name)(a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_isfinite(self):
         self.check_unary_inf('isfinite')
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_isinf(self):
         self.check_unary_inf('isinf')
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_isnan(self):
         self.check_unary_nan('isnan')
diff --git a/tests/third_party/cupy/logic_tests/test_ops.py b/tests/third_party/cupy/logic_tests/test_ops.py
index 2948602b8a51..55b8617882b1 100644
--- a/tests/third_party/cupy/logic_tests/test_ops.py
+++ b/tests/third_party/cupy/logic_tests/test_ops.py
@@ -1,4 +1,5 @@
 import unittest
+import pytest
 
 from tests.third_party.cupy import testing
 
@@ -19,14 +20,18 @@ def check_binary(self, name, xp, dtype):
         b = testing.shaped_reverse_arange((2, 3), xp, dtype)
         return getattr(xp, name)(a, b)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_and(self):
         self.check_binary('logical_and')
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_or(self):
         self.check_binary('logical_or')
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_xor(self):
         self.check_binary('logical_xor')
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_not(self):
         self.check_unary('logical_not')
diff --git a/tests/third_party/cupy/manipulation_tests/test_basic.py b/tests/third_party/cupy/manipulation_tests/test_basic.py
index 936e94c7e19f..8cc170d3241a 100644
--- a/tests/third_party/cupy/manipulation_tests/test_basic.py
+++ b/tests/third_party/cupy/manipulation_tests/test_basic.py
@@ -1,5 +1,6 @@
 import itertools
 import unittest
+import pytest
 
 import numpy
 
@@ -27,6 +28,7 @@ def test_copyto_dtype(self, xp, dtype):
         xp.copyto(b, a)
         return b
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
     def test_copyto_broadcast(self, xp, dtype):
@@ -35,6 +37,7 @@ def test_copyto_broadcast(self, xp, dtype):
         xp.copyto(b, a)
         return b
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
     def test_copyto_where(self, xp, dtype):
@@ -107,6 +110,7 @@ def test_copyto_where(self, xp, dtype):
     *testing.product(
         {'src': [float(3.2), int(0), int(4), int(-4), True, False, 1 + 1j],
          'dst_shape': [(), (0,), (1,), (1, 1), (2, 2)]}))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestCopytoFromScalar(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/manipulation_tests/test_dims.py b/tests/third_party/cupy/manipulation_tests/test_dims.py
index c6ef32b21484..2c3e73b596da 100644
--- a/tests/third_party/cupy/manipulation_tests/test_dims.py
+++ b/tests/third_party/cupy/manipulation_tests/test_dims.py
@@ -19,15 +19,18 @@ def check_atleast(self, func, xp):
         f = numpy.float32(1)
         return func(a, b, c, d, e, f)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_atleast_1d1(self, xp):
         return self.check_atleast(xp.atleast_1d, xp)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_atleast_1d2(self, xp):
         a = testing.shaped_arange((1, 3, 2), xp)
         return xp.atleast_1d(a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_atleast_2d1(self, xp):
         return self.check_atleast(xp.atleast_2d, xp)
@@ -37,6 +40,7 @@ def test_atleast_2d2(self, xp):
         a = testing.shaped_arange((1, 3, 2), xp)
         return xp.atleast_2d(a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_atleast_3d1(self, xp):
         return self.check_atleast(xp.atleast_3d, xp)
diff --git a/tests/third_party/cupy/manipulation_tests/test_tiling.py b/tests/third_party/cupy/manipulation_tests/test_tiling.py
index b6d0d180bf2e..fa00273671d5 100644
--- a/tests/third_party/cupy/manipulation_tests/test_tiling.py
+++ b/tests/third_party/cupy/manipulation_tests/test_tiling.py
@@ -16,6 +16,7 @@
     {'repeats': [1, 2, 3], 'axis': 1},
     {'repeats': [1, 2, 3], 'axis': -2},
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestRepeat(unittest.TestCase):
 
@@ -44,6 +45,7 @@ def test_method(self):
     {'repeats': [2], 'axis': None},
     {'repeats': [2], 'axis': 1},
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestRepeatListBroadcast(unittest.TestCase):
 
@@ -65,6 +67,7 @@ def test_array_repeat(self, xp):
     {'repeats': [1, 2, 3, 4], 'axis': None},
     {'repeats': [1, 2, 3, 4], 'axis': 0},
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestRepeat1D(unittest.TestCase):
 
@@ -97,6 +100,7 @@ def test_array_repeat(self, xp):
     {'repeats': 2, 'axis': -4},
     {'repeats': 2, 'axis': 3},
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestRepeatFailure(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/manipulation_tests/test_transpose.py b/tests/third_party/cupy/manipulation_tests/test_transpose.py
index d12b268c2f4e..ed06f050f777 100644
--- a/tests/third_party/cupy/manipulation_tests/test_transpose.py
+++ b/tests/third_party/cupy/manipulation_tests/test_transpose.py
@@ -41,12 +41,14 @@ def test_moveaxis6(self, xp):
         return xp.moveaxis(a, [0, 2, 1], [3, 4, 0])
 
     # dim is too large
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid1_1(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
             with pytest.raises(numpy.AxisError):
                 xp.moveaxis(a, [0, 1], [1, 3])
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid1_2(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
@@ -54,12 +56,14 @@ def test_moveaxis_invalid1_2(self):
                 xp.moveaxis(a, [0, 1], [1, 3])
 
     # dim is too small
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid2_1(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
             with pytest.raises(numpy.AxisError):
                 xp.moveaxis(a, [0, -4], [1, 2])
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid2_2(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
@@ -67,6 +71,7 @@ def test_moveaxis_invalid2_2(self):
                 xp.moveaxis(a, [0, -4], [1, 2])
 
     # len(source) != len(destination)
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid3(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
@@ -74,6 +79,7 @@ def test_moveaxis_invalid3(self):
                 xp.moveaxis(a, [0, 1, 2], [1, 2])
 
     # len(source) != len(destination)
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid4(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
@@ -81,6 +87,7 @@ def test_moveaxis_invalid4(self):
                 xp.moveaxis(a, [0, 1], [1, 2, 0])
 
     # Use the same axis twice
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid5_1(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
@@ -104,6 +111,7 @@ def test_rollaxis(self, xp):
         a = testing.shaped_arange((2, 3, 4), xp)
         return xp.rollaxis(a, 2)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_rollaxis_failure(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
@@ -115,6 +123,7 @@ def test_swapaxes(self, xp):
         a = testing.shaped_arange((2, 3, 4), xp)
         return xp.swapaxes(a, 2, 0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_swapaxes_failure(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py
index 9fbbe0e7deeb..28771b4979b5 100644
--- a/tests/third_party/cupy/math_tests/test_arithmetic.py
+++ b/tests/third_party/cupy/math_tests/test_arithmetic.py
@@ -57,6 +57,7 @@ def test_raises_with_numpy_input(self):
         'name': ['reciprocal'],
     })
 ))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestArithmeticUnary(unittest.TestCase):
 
     @testing.numpy_cupy_allclose(atol=1e-5)
@@ -204,6 +205,7 @@ def check_binary(self, xp):
         'name': ['divide', 'true_divide', 'subtract'],
     })
 ))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestArithmeticBinary(ArithmeticBinaryBase, unittest.TestCase):
 
     def test_binary(self):
@@ -253,6 +255,7 @@ def test_binary(self):
         'use_dtype': [True, False],
     })
 ))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestArithmeticBinary2(ArithmeticBinaryBase, unittest.TestCase):
 
     def test_binary(self):
@@ -279,6 +282,7 @@ def test_modf(self, xp, dtype):
     'xp': [numpy, cupy],
     'shape': [(3, 2), (), (3, 0, 2)]
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestBoolSubtract(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/math_tests/test_matmul.py b/tests/third_party/cupy/math_tests/test_matmul.py
index 3d9d258fd9d8..f8533eef8c1c 100644
--- a/tests/third_party/cupy/math_tests/test_matmul.py
+++ b/tests/third_party/cupy/math_tests/test_matmul.py
@@ -54,6 +54,7 @@
             ((1, 3, 3), (10, 1, 3, 1)),
         ],
     }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestMatmul(unittest.TestCase):
 
@@ -88,6 +89,7 @@ def test_cupy_matmul(self, xp, dtype1):
             ((6, 5, 3, 2), (2,)),
         ],
     }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestMatmulLarge(unittest.TestCase):
 
@@ -138,6 +140,7 @@ def test_cupy_matmul(self, xp, dtype1):
             ((0, 1, 1), (2, 1, 1)),
         ],
     }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestMatmulInvalidShape(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/math_tests/test_rounding.py b/tests/third_party/cupy/math_tests/test_rounding.py
index 034e36c9f14b..91d74923f063 100644
--- a/tests/third_party/cupy/math_tests/test_rounding.py
+++ b/tests/third_party/cupy/math_tests/test_rounding.py
@@ -77,6 +77,7 @@ def test_round_(self):
 @testing.parameterize(*testing.product({
     'decimals': [-2, -1, 0, 1, 2],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestRound(unittest.TestCase):
 
     shape = (20,)
@@ -136,6 +137,7 @@ def test_round_small(self, xp, dtype):
         (1.6, 0),
     ]
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestRoundBorder(unittest.TestCase):
 
     @testing.numpy_cupy_allclose(atol=1e-5)
diff --git a/tests/third_party/cupy/math_tests/test_sumprod.py b/tests/third_party/cupy/math_tests/test_sumprod.py
index 15066f03872e..d9fe3b22b265 100644
--- a/tests/third_party/cupy/math_tests/test_sumprod.py
+++ b/tests/third_party/cupy/math_tests/test_sumprod.py
@@ -227,6 +227,7 @@ def test_nansum_axis_transposed(self, xp, dtype):
         'shape': [(2, 3, 4), (20, 30, 40)],
     })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestNansumNanprodExtra(unittest.TestCase):
 
@@ -254,6 +255,7 @@ def test_nansum_out_wrong_shape(self):
         'axis': [(1, 3), (0, 2, 3)],
     })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestNansumNanprodAxes(unittest.TestCase):
     @testing.for_all_dtypes(no_bool=True, no_float16=True)
@@ -269,6 +271,7 @@ def test_nansum_axes(self, xp, dtype):
 
 
 @testing.parameterize(*testing.product({'axis': axes}))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestCumsum(unittest.TestCase):
 
@@ -387,6 +390,7 @@ def test_cumprod_1dim(self, xp, dtype):
         a = testing.shaped_arange((5,), xp, dtype)
         return xp.cumprod(a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_cumprod_out(self, xp, dtype):
@@ -409,6 +413,7 @@ def test_cumprod_2dim_without_axis(self, xp, dtype):
         a = testing.shaped_arange((4, 5), xp, dtype)
         return xp.cumprod(a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_cumprod_2dim_with_axis(self, xp, dtype):
@@ -434,6 +439,7 @@ def test_cumprod_huge_array(self):
         del result
         cupy.get_default_memory_pool().free_all_blocks()
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     def test_invalid_axis_lower1(self, dtype):
         for xp in (numpy, cupy):
@@ -441,6 +447,7 @@ def test_invalid_axis_lower1(self, dtype):
             with pytest.raises(numpy.AxisError):
                 xp.cumprod(a, axis=-a.ndim - 1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     def test_invalid_axis_lower2(self, dtype):
         for xp in (numpy, cupy):
@@ -448,6 +455,7 @@ def test_invalid_axis_lower2(self, dtype):
             with pytest.raises(numpy.AxisError):
                 xp.cumprod(a, axis=-a.ndim - 1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     def test_invalid_axis_upper1(self, dtype):
         for xp in (numpy, cupy):
@@ -455,6 +463,7 @@ def test_invalid_axis_upper1(self, dtype):
             with pytest.raises(numpy.AxisError):
                 return xp.cumprod(a, axis=a.ndim)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     def test_invalid_axis_upper2(self, dtype):
         a = testing.shaped_arange((4, 5), cupy, dtype)
@@ -521,6 +530,7 @@ def test_diff_2dim_with_append(self, xp, dtype):
         b = testing.shaped_arange((1, 5), xp, dtype)
         return xp.diff(a, axis=0, append=b, n=2)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.with_requires('numpy>=1.16')
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
@@ -528,6 +538,7 @@ def test_diff_2dim_with_scalar_append(self, xp, dtype):
         a = testing.shaped_arange((4, 5), xp, dtype)
         return xp.diff(a, prepend=1, append=0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.with_requires('numpy>=1.16')
     def test_diff_invalid_axis(self):
         for xp in (numpy, cupy):
diff --git a/tests/third_party/cupy/random_tests/test_distributions.py b/tests/third_party/cupy/random_tests/test_distributions.py
index 1504cad45c60..e53d313d91f0 100644
--- a/tests/third_party/cupy/random_tests/test_distributions.py
+++ b/tests/third_party/cupy/random_tests/test_distributions.py
@@ -1,4 +1,5 @@
 import unittest
+import pytest
 
 import numpy
 
@@ -31,6 +32,7 @@ def check_distribution(self, dist_name, params):
     'b_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsBeta(RandomDistributionsTestCase):
 
@@ -68,6 +70,7 @@ def test_binomial(self, n_dtype, p_dtype):
     'df_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsChisquare(unittest.TestCase):
 
@@ -88,6 +91,7 @@ def test_chisquare(self, df_dtype):
     'alpha_shape': [(3,)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsDirichlet(RandomDistributionsTestCase):
 
@@ -103,6 +107,7 @@ def test_dirichlet(self, alpha_dtype):
     'scale_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsExponential(RandomDistributionsTestCase):
 
@@ -112,6 +117,7 @@ def test_exponential(self, scale_dtype):
         self.check_distribution('exponential', {'scale': scale})
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsExponentialError(RandomDistributionsTestCase):
 
@@ -127,6 +133,7 @@ def test_negative_scale(self):
     'dfden_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsF(unittest.TestCase):
 
@@ -151,6 +158,7 @@ def test_f(self, dfnum_dtype, dfden_dtype):
     'scale_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsGamma(unittest.TestCase):
 
@@ -194,6 +202,7 @@ def test_geometric(self, p_dtype):
     'scale_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsGumbel(RandomDistributionsTestCase):
 
@@ -239,6 +248,7 @@ def test_hypergeometric(self, ngood_dtype, nbad_dtype):
     'scale_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsuLaplace(RandomDistributionsTestCase):
 
@@ -256,6 +266,7 @@ def test_laplace(self, loc_dtype, scale_dtype):
     'scale_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsLogistic(RandomDistributionsTestCase):
 
@@ -342,6 +353,7 @@ def test_normal(self, mean_dtype, cov_dtype):
     'p_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsNegativeBinomial(RandomDistributionsTestCase):
 
@@ -368,6 +380,7 @@ def test_negative_binomial_for_noninteger_n(self, n_dtype, p_dtype):
     'nonc_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsNoncentralChisquare(RandomDistributionsTestCase):
 
@@ -401,6 +414,7 @@ def test_noncentral_chisquare_for_invalid_params(self, param_dtype):
     'nonc_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsNoncentralF(RandomDistributionsTestCase):
 
@@ -444,6 +458,7 @@ def test_noncentral_f_for_invalid_params(self, param_dtype):
     'scale_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsNormal(RandomDistributionsTestCase):
 
@@ -460,6 +475,7 @@ def test_normal(self, loc_dtype, scale_dtype):
     'a_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsPareto(unittest.TestCase):
 
@@ -523,6 +539,7 @@ def test_power_for_negative_a(self, a_dtype):
     'scale_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsRayleigh(RandomDistributionsTestCase):
 
@@ -571,6 +588,7 @@ def test_standard_exponential(self):
     'shape_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsStandardGamma(RandomDistributionsTestCase):
 
@@ -597,6 +615,7 @@ def test_standard_normal(self):
     'df_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsStandardT(unittest.TestCase):
 
@@ -619,6 +638,7 @@ def test_standard_t(self, df_dtype):
     'right_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsTriangular(RandomDistributionsTestCase):
 
@@ -662,6 +682,7 @@ def test_triangular_for_invalid_params(self, param_dtype):
     'high_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsUniform(RandomDistributionsTestCase):
 
@@ -679,6 +700,7 @@ def test_uniform(self, low_dtype, high_dtype):
     'kappa_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsVonmises(unittest.TestCase):
 
@@ -703,6 +725,7 @@ def test_vonmises(self, mu_dtype, kappa_dtype):
     'scale_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsWald(RandomDistributionsTestCase):
 
@@ -720,6 +743,7 @@ def test_wald(self, mean_dtype, scale_dtype):
     'a_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsWeibull(RandomDistributionsTestCase):
 
@@ -746,6 +770,7 @@ def test_weibull_for_negative_a(self, a_dtype):
     'a_shape': [(), (3, 2)],
 })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDistributionsZipf(RandomDistributionsTestCase):
 
diff --git a/tests/third_party/cupy/random_tests/test_sample.py b/tests/third_party/cupy/random_tests/test_sample.py
index 19e3fd54ba26..3f8a0169ac12 100644
--- a/tests/third_party/cupy/random_tests/test_sample.py
+++ b/tests/third_party/cupy/random_tests/test_sample.py
@@ -1,5 +1,6 @@
 import unittest
 from unittest import mock
+import pytest
 
 import numpy
 
@@ -32,6 +33,7 @@ def test_lo_hi_nonrandom(self):
         a = random.randint(-1.1, -0.9, size=(2, 2))
         numpy.testing.assert_array_equal(a, cupy.full((2, 2), -1))
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_zero_sizes(self):
         a = random.randint(10, size=(0,))
         numpy.testing.assert_array_equal(a, cupy.array(()))
@@ -44,6 +46,7 @@ def test_zero_sizes(self):
 @testing.gpu
 class TestRandint2(unittest.TestCase):
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @condition.repeat(3, 10)
     def test_bound_1(self):
         vals = [random.randint(0, 10, (2, 3)) for _ in range(10)]
@@ -52,6 +55,7 @@ def test_bound_1(self):
         self.assertEqual(min(_.min() for _ in vals), 0)
         self.assertEqual(max(_.max() for _ in vals), 9)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @condition.repeat(3, 10)
     def test_bound_2(self):
         vals = [random.randint(0, 2) for _ in range(20)]
@@ -60,6 +64,7 @@ def test_bound_2(self):
         self.assertEqual(min(_.min() for _ in vals), 0)
         self.assertEqual(max(_.max() for _ in vals), 1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @condition.repeat(3, 10)
     def test_bound_overflow(self):
         # 100 - (-100) exceeds the range of int8
@@ -68,6 +73,7 @@ def test_bound_overflow(self):
         self.assertGreaterEqual(val.min(), -100)
         self.assertLess(val.max(), 100)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @condition.repeat(3, 10)
     def test_bound_float1(self):
         # generate floats s.t. int(low) < int(high)
@@ -80,6 +86,7 @@ def test_bound_float1(self):
         self.assertEqual(min(_.min() for _ in vals), int(low))
         self.assertEqual(max(_.max() for _ in vals), int(high) - 1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_bound_float2(self):
         vals = [random.randint(-1.0, 1.0, (2, 3)) for _ in range(10)]
         for val in vals:
@@ -105,6 +112,7 @@ def test_goodness_of_fit_2(self):
         self.assertTrue(hypothesis.chi_square_test(counts, expected))
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestRandintDtype(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/sorting_tests/test_search.py b/tests/third_party/cupy/sorting_tests/test_search.py
index d9e80f3e87d6..838f559ed8cf 100644
--- a/tests/third_party/cupy/sorting_tests/test_search.py
+++ b/tests/third_party/cupy/sorting_tests/test_search.py
@@ -29,30 +29,35 @@ def test_argmax_nan(self, xp, dtype):
         a = xp.array([float('nan'), -1, 1], dtype)
         return a.argmax()
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_argmax_axis_large(self, xp, dtype):
         a = testing.shaped_random((3, 1000), xp, dtype)
         return a.argmax(axis=0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_external_argmax_axis_large(self, xp, dtype):
         a = testing.shaped_random((3, 1000), xp, dtype)
         return xp.argmax(a, axis=0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_argmax_axis0(self, xp, dtype):
         a = testing.shaped_random((2, 3, 4), xp, dtype)
         return a.argmax(axis=0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_argmax_axis1(self, xp, dtype):
         a = testing.shaped_random((2, 3, 4), xp, dtype)
         return a.argmax(axis=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_argmax_axis2(self, xp, dtype):
@@ -72,6 +77,7 @@ def test_argmax_zero_size(self, dtype):
             with pytest.raises(ValueError):
                 a.argmax()
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     def test_argmax_zero_size_axis0(self, dtype):
         for xp in (numpy, cupy):
@@ -79,6 +85,7 @@ def test_argmax_zero_size_axis0(self, dtype):
             with pytest.raises(ValueError):
                 a.argmax(axis=0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_argmax_zero_size_axis1(self, xp, dtype):
@@ -103,30 +110,35 @@ def test_external_argmin_all(self, xp, dtype):
         a = testing.shaped_random((2, 3), xp, dtype)
         return xp.argmin(a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_argmin_axis_large(self, xp, dtype):
         a = testing.shaped_random((3, 1000), xp, dtype)
         return a.argmin(axis=0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_external_argmin_axis_large(self, xp, dtype):
         a = testing.shaped_random((3, 1000), xp, dtype)
         return xp.argmin(a, axis=0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_argmin_axis0(self, xp, dtype):
         a = testing.shaped_random((2, 3, 4), xp, dtype)
         return a.argmin(axis=0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_argmin_axis1(self, xp, dtype):
         a = testing.shaped_random((2, 3, 4), xp, dtype)
         return a.argmin(axis=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_argmin_axis2(self, xp, dtype):
@@ -146,6 +158,7 @@ def test_argmin_zero_size(self, dtype):
             with pytest.raises(ValueError):
                 return a.argmin()
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     def test_argmin_zero_size_axis0(self, dtype):
         for xp in (numpy, cupy):
@@ -153,6 +166,7 @@ def test_argmin_zero_size_axis0(self, dtype):
             with pytest.raises(ValueError):
                 a.argmin(axis=0)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_argmin_zero_size_axis1(self, xp, dtype):
@@ -571,6 +585,7 @@ def test_nanargmax_zero_size_axis1(self, xp, dtype):
         'side': ['left', 'right'],
         'shape': [(), (10,), (6, 3, 3)]})
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestSearchSorted(unittest.TestCase):
 
     @testing.for_all_dtypes(no_bool=True)
@@ -586,6 +601,7 @@ def test_searchsorted(self, xp, dtype):
 @testing.parameterize(
     {'side': 'left'},
     {'side': 'right'})
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestSearchSortedNanInf(unittest.TestCase):
 
     @testing.numpy_cupy_array_equal()
@@ -648,6 +664,7 @@ def test_searchsorted_minf(self, xp):
         return y,
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestSearchSortedInvalid(unittest.TestCase):
 
@@ -662,6 +679,7 @@ def test_searchsorted_ndbins(self):
                 xp.searchsorted(bins, x)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestSearchSortedWithSorter(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/sorting_tests/test_sort.py b/tests/third_party/cupy/sorting_tests/test_sort.py
index 966ccbf309b6..f26d13cf537b 100644
--- a/tests/third_party/cupy/sorting_tests/test_sort.py
+++ b/tests/third_party/cupy/sorting_tests/test_sort.py
@@ -34,12 +34,14 @@ def test_external_sort_zero_dim(self):
             with pytest.raises(numpy.AxisError):
                 xp.sort(a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_sort_two_or_more_dim(self, xp):
         a = testing.shaped_random((2, 3, 3), xp)
         a.sort()
         return a
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_external_sort_two_or_more_dim(self, xp):
         a = testing.shaped_random((2, 3, 3), xp)
@@ -103,6 +105,7 @@ def test_sort_axis3(self, xp):
         a.sort(axis=2)
         return a
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_external_sort_axis(self, xp):
         a = testing.shaped_random((2, 3, 3), xp)
@@ -114,11 +117,13 @@ def test_sort_negative_axis(self, xp):
         a.sort(axis=-2)
         return a
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_external_sort_negative_axis(self, xp):
         a = testing.shaped_random((2, 3, 3), xp)
         return xp.sort(a, axis=-2)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_external_sort_none_axis(self, xp):
         a = testing.shaped_random((2, 3, 3), xp)
@@ -135,12 +140,14 @@ def test_sort_invalid_axis2(self):
         with self.assertRaises(numpy.AxisError):
             a.sort(axis=3)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_external_sort_invalid_axis1(self):
         for xp in (numpy, cupy):
             a = testing.shaped_random((2, 3, 3), xp)
             with pytest.raises(numpy.AxisError):
                 xp.sort(a, axis=3)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_external_sort_invalid_axis2(self):
         a = testing.shaped_random((2, 3, 3), cupy)
         with self.assertRaises(numpy.AxisError):
@@ -157,12 +164,14 @@ def test_sort_invalid_negative_axis2(self):
         with self.assertRaises(numpy.AxisError):
             a.sort(axis=-4)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_external_sort_invalid_negative_axis1(self):
         for xp in (numpy, cupy):
             a = testing.shaped_random((2, 3, 3), xp)
             with pytest.raises(numpy.AxisError):
                 xp.sort(a, axis=-4)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_external_sort_invalid_negative_axis2(self):
         a = testing.shaped_random((2, 3, 3), cupy)
         with self.assertRaises(numpy.AxisError):
@@ -459,6 +468,7 @@ def test_sort_complex_nan(self, xp, dtype):
     'external': [False, True],
     'length': [10, 20000],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestPartition(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/statistics_tests/test_correlation.py b/tests/third_party/cupy/statistics_tests/test_correlation.py
index fd5ec429a54d..3c68a998b5ad 100644
--- a/tests/third_party/cupy/statistics_tests/test_correlation.py
+++ b/tests/third_party/cupy/statistics_tests/test_correlation.py
@@ -70,6 +70,7 @@ def check_raises(self, a_shape, y_shape=None, rowvar=True, bias=False,
             with pytest.raises(ValueError):
                 xp.cov(a, y, rowvar, bias, ddof)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_cov(self):
         self.check((2, 3))
         self.check((2,), (2,))
@@ -78,10 +79,12 @@ def test_cov(self):
         self.check((2, 3), bias=True)
         self.check((2, 3), ddof=2)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_cov_warns(self):
         self.check_warns((2, 3), ddof=3)
         self.check_warns((2, 3), ddof=4)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_cov_raises(self):
         self.check_raises((2, 3), ddof=1.2)
         self.check_raises((3, 4, 2))
@@ -97,6 +100,7 @@ def test_cov_empty(self):
     'shape1': [(5,), (6,), (20,), (21,)],
     'shape2': [(5,), (6,), (20,), (21,)],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestCorrelateShapeCombination(unittest.TestCase):
 
     @testing.for_all_dtypes(no_float16=True)
@@ -111,6 +115,7 @@ def test_correlate(self, xp, dtype):
 @testing.parameterize(*testing.product({
     'mode': ['valid', 'full', 'same']
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestCorrelate(unittest.TestCase):
 
     @testing.for_all_dtypes()
@@ -139,6 +144,7 @@ def test_correlate_diff_types(self, xp, dtype1, dtype2):
 @testing.parameterize(*testing.product({
     'mode': ['valid', 'same', 'full']
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestCorrelateInvalid(unittest.TestCase):
 
     @testing.with_requires('numpy>=1.18')
diff --git a/tests/third_party/cupy/statistics_tests/test_histogram.py b/tests/third_party/cupy/statistics_tests/test_histogram.py
index 0560dc00256e..2065a826986d 100644
--- a/tests/third_party/cupy/statistics_tests/test_histogram.py
+++ b/tests/third_party/cupy/statistics_tests/test_histogram.py
@@ -138,6 +138,7 @@ def test_histogram_float_weights_dtype(self, xp, dtype):
         assert xp.issubdtype(h.dtype, xp.floating)
         return h
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_histogram_weights_basic(self):
         v = cupy.random.rand(100)
         w = cupy.ones(100) * 5
diff --git a/tests/third_party/cupy/statistics_tests/test_meanvar.py b/tests/third_party/cupy/statistics_tests/test_meanvar.py
index ce6953812bbe..aea22d02c511 100644
--- a/tests/third_party/cupy/statistics_tests/test_meanvar.py
+++ b/tests/third_party/cupy/statistics_tests/test_meanvar.py
@@ -19,36 +19,42 @@ def test_median_noaxis(self, xp, dtype):
         a = testing.shaped_random((3, 4, 5), xp, dtype)
         return xp.median(a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_median_axis1(self, xp, dtype):
         a = testing.shaped_random((3, 4, 5), xp, dtype)
         return xp.median(a, axis=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_median_axis2(self, xp, dtype):
         a = testing.shaped_random((3, 4, 5), xp, dtype)
         return xp.median(a, axis=2)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_median_overwrite_input(self, xp, dtype):
         a = testing.shaped_random((3, 4, 5), xp, dtype)
         return xp.median(a, overwrite_input=True)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_median_keepdims_axis1(self, xp, dtype):
         a = testing.shaped_random((3, 4, 5), xp, dtype)
         return xp.median(a, axis=1, keepdims=True)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_median_keepdims_noaxis(self, xp, dtype):
         a = testing.shaped_random((3, 4, 5), xp, dtype)
         return xp.median(a, keepdims=True)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_median_invalid_axis(self):
         for xp in [numpy, cupy]:
             a = testing.shaped_random((3, 4, 5), xp)
@@ -72,6 +78,7 @@ def test_median_invalid_axis(self):
         'keepdims': [True, False]
     })
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestMedianAxis(unittest.TestCase):
 
@@ -82,6 +89,7 @@ def test_median_axis_sequence(self, xp, dtype):
         return xp.median(a, self.axis, keepdims=self.keepdims)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestAverage(unittest.TestCase):
 
@@ -148,6 +156,7 @@ def test_external_mean_all(self, xp, dtype):
         a = testing.shaped_arange((2, 3), xp, dtype)
         return xp.mean(a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_mean_axis(self, xp, dtype):
@@ -160,18 +169,21 @@ def test_external_mean_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return xp.mean(a, axis=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_mean_all_float64_dtype(self, xp, dtype):
         a = xp.full((2, 3, 4), 123456789, dtype=dtype)
         return xp.mean(a, dtype=numpy.float64)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_mean_all_int64_dtype(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return xp.mean(a, dtype=numpy.int64)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_mean_all_complex_dtype(self, xp, dtype):
@@ -202,24 +214,28 @@ def test_external_var_all_ddof(self, xp, dtype):
         a = testing.shaped_arange((2, 3), xp, dtype)
         return xp.var(a, ddof=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_var_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return a.var(axis=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_external_var_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return xp.var(a, axis=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_var_axis_ddof(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return a.var(axis=1, ddof=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_external_var_axis_ddof(self, xp, dtype):
@@ -250,24 +266,28 @@ def test_external_std_all_ddof(self, xp, dtype):
         a = testing.shaped_arange((2, 3), xp, dtype)
         return xp.std(a, ddof=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_std_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return a.std(axis=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_external_std_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return xp.std(a, axis=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_std_axis_ddof(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return a.std(axis=1, ddof=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_external_std_axis_ddof(self, xp, dtype):
@@ -454,6 +474,7 @@ def test_nanstd_float16(self, xp):
     ],
     'func': ['mean', 'std', 'var'],
 }))
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestProductZeroLength(unittest.TestCase):
 

From dd47c08c601e2ed95b1b59c0f6715f4b07dea5ff Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Thu, 23 Feb 2023 18:01:26 +0100
Subject: [PATCH 3/7] Merge master into gold/2021 (#1315)

* Complete support of python 3.10 in external CI (#1269)

* Set minimum required versions & fix debug building (#1270)

* Set minimum required versions & fix debug building

* Fix typo

* Add support of NumPy 1.24 (#1276)

* Set minimum required versions & fix debug building

* Add support of numpy 1.24

* Get rid of 'has_aspect_host' property in tests (#1274)

* Set minimum required versions & fix debug building

* Get rid of 'has_aspect_host' property in tests

* Update tests/test_sycl_queue.py

Co-authored-by: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>

Co-authored-by: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>

* Add support of dpnp.less_equal() (#1275)

* Set minimum required versions & fix debug building

* Add support of dpnp.less_equal()

* Test no broadcast together with input shapes

* Add support of comparison operations (#1278)

* Use eye() function from dpctl.tensor. (#1271)

* Use eye() function from dpctl.tensor.

* Add missed order in test for eye() function.

* Updated copyright year. Added parameter like for eye() function.

* Removed input argumet additional kwards for eye() function.

* Get rid of unsupported types in array creation tests (#1283)

* Add support of logical comparison operations (#1280)

* Add device and sycl_queue keyword arguments to random calls (#1277)

* Set minimum required versions & fix debug building

* Add device and sycl_queue keyword arguments to random calls

* Add device and sycl_queue to dpnp.random.seed() & use random values if seed is None

* Update dpnp/random/dpnp_iface_random.py

Co-authored-by: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>

---------

Co-authored-by: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>

* add __repr__

* add __str__

* reviewer's comments

* Fixed gh-1272 (#1287)

* Support high=None in dpnp.randint() (#1284)

* linter changes applied

* Add operation __index__ and __complex__ (#1285)

* Add operation __index__ and __complex__

* Add tests

* Update tests with proper call of dpctl.SyclQueue() (#1290)

* Update minimum required versions of dependent components. (#1289)

* dpnp.add() doesn't work properly with a scalar (#1288)

* dpnp.add() doesn't work properly with a scalar

* get rid of dpctl.SyclQueue() call in tests with unsupported device keyword

* Add a fix for crash on CPU device

* USM type in operations with a scalar

* Porting fix for crash to logic kernel

* Add dlpack support with tests and docstrings

* Add a test for dlpack with dpt

* Fix remarks, add _create_from_usm_ndarray func and move tests to test_sycl_queue

* Use tril() and triu() function from dpctl.tensor (#1286)

* Use tril() function from dpctl.tensor

* Use triu() function from dpctl.tensor

* Changed tests for tril() and triu() functions.

* Skip tests for tril() and triu() functions with usm_type.

* dpnp.subtract() doesn't work properly with a scalar (#1292)

* dpnp.add() doesn't work properly with a scalar

* dpnp.subtract() doesn't work properly with a scalar

* USM type in operations with a scalar

* Rollback excluded 'floor_divide' tests from skip scope

* Explicit vector operations instead of saturation functions

* Use std::int32_t and std::int64_t types

* Tune tail's loop of kernel for the vector op

* dpnp.divide() doesn't work properly with a scalar (#1295)

* dpnp.add() doesn't work properly with a scalar

* dpnp.subtract() doesn't work properly with a scalar

* dpnp.divide() doesn't work properly with a scalar

* dpnp.divide() doesn't work properly with a scalar

* Use std::int32_t and std::int64_t types

* Disable floating-point optimizations that assume arguments and results are not NaNs or +-Inf

* Fix issue with divide on Iris Xe

* Updae example3 building from debug build script (#1298)

* Remove temporary solution accepting CFD with equal SYCL context instead of queue (#1303)

* Intel LLVM is to use conda's gcc toolchain, sysroot and target libraries (#1306)

* Tests are crashing if no default device (#1311)

* Setting version to 0.11.1 (#1308)

---------

Co-authored-by: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Co-authored-by: Natalia Polina <natalia.polina@intel.com>
Co-authored-by: Vahid Tavanashad <vahid.tavanashad@intel.com>
Co-authored-by: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
---
 .github/workflows/conda-package.yml           |  11 +-
 0.build.sh                                    |  13 +-
 conda-recipe/build.sh                         |   5 +
 conda-recipe/meta.yaml                        |   8 +-
 doc/conf.py                                   |   2 +-
 dpnp/backend/CMakeLists.txt                   |  23 +-
 dpnp/backend/doc/Doxyfile                     |   2 +-
 .../include/dpnp_gen_1arg_1type_tbl.hpp       |  69 ++-
 .../include/dpnp_gen_2arg_2type_tbl.hpp       |  99 ++++
 .../include/dpnp_gen_2arg_3type_tbl.hpp       |  93 ++-
 dpnp/backend/include/dpnp_iface.hpp           |  30 +-
 dpnp/backend/include/dpnp_iface_fptr.hpp      |  41 +-
 .../kernels/dpnp_krnl_arraycreation.cpp       |  46 +-
 dpnp/backend/kernels/dpnp_krnl_elemwise.cpp   | 561 ++++++++----------
 dpnp/backend/kernels/dpnp_krnl_fft.cpp        |   8 +-
 dpnp/backend/kernels/dpnp_krnl_indexing.cpp   |   3 +-
 dpnp/backend/kernels/dpnp_krnl_logic.cpp      | 457 +++++++++++++-
 dpnp/backend/kernels/dpnp_krnl_random.cpp     |  14 +-
 dpnp/backend/src/dpnp_fptr.hpp                | 119 +++-
 dpnp/backend/src/dpnp_utils.hpp               |  10 +-
 dpnp/backend/src/dpnpc_memory_adapter.hpp     |   6 +-
 dpnp/backend/src/queue_sycl.cpp               |   7 +-
 dpnp/backend/src/queue_sycl.hpp               |  11 +-
 dpnp/dparray.pyx                              |  10 +-
 dpnp/dpnp_algo/dpnp_algo.pxd                  |  18 +-
 dpnp/dpnp_algo/dpnp_algo.pyx                  |  24 +-
 dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx    | 128 +---
 dpnp/dpnp_algo/dpnp_algo_logic.pyx            | 185 ++----
 dpnp/dpnp_array.py                            |  44 +-
 dpnp/dpnp_container.py                        |  44 +-
 dpnp/dpnp_iface.py                            |  66 ++-
 dpnp/dpnp_iface_arraycreation.py              | 138 +++--
 dpnp/dpnp_iface_logic.py                      | 559 ++++++++++++-----
 dpnp/dpnp_iface_mathematical.py               | 252 ++++----
 dpnp/dpnp_iface_statistics.py                 |   6 +-
 dpnp/dpnp_iface_types.py                      |  14 +-
 dpnp/dpnp_utils/dpnp_algo_utils.pxd           |   4 +-
 dpnp/dpnp_utils/dpnp_algo_utils.pyx           |  68 ++-
 dpnp/random/dpnp_iface_random.py              | 432 +++++++++++---
 dpnp/random/dpnp_random_state.py              |  13 +-
 dpnp/version.py                               |   4 +-
 examples/example4.py                          |   8 +-
 tests/conftest.py                             |  21 +-
 tests/helper.py                               |  39 ++
 tests/skipped_tests.tbl                       |  26 +-
 tests/skipped_tests_gpu.tbl                   |  33 +-
 tests/test_arraycreation.py                   | 389 ++++--------
 tests/test_dparray.py                         | 159 ++++-
 tests/test_indexing.py                        |  55 +-
 tests/test_linalg.py                          |  72 ++-
 tests/test_logic.py                           | 147 ++++-
 tests/test_mathematical.py                    | 234 +++++---
 tests/test_random.py                          |   1 -
 tests/test_random_state.py                    |  37 +-
 tests/test_strides.py                         |  68 +--
 tests/test_sycl_queue.py                      | 198 +++++--
 tests/test_usm_type.py                        |  78 ++-
 .../cupy/creation_tests/test_basic.py         |   2 +-
 .../cupy/creation_tests/test_from_data.py     |   1 +
 .../cupy/creation_tests/test_matrix.py        |  11 +-
 .../cupy/creation_tests/test_ranges.py        |   2 +-
 .../cupy/indexing_tests/test_generate.py      |   2 +-
 .../cupy/indexing_tests/test_insert.py        |   2 +-
 .../cupy/logic_tests/test_comparison.py       |   2 -
 .../third_party/cupy/logic_tests/test_ops.py  |   4 -
 .../cupy/math_tests/test_arithmetic.py        |  29 +-
 .../cupy/random_tests/test_sample.py          |   2 -
 .../cupy/statistics_tests/test_meanvar.py     |   5 +-
 tests_external/skipped_tests_numpy.tbl        |  77 ---
 utils/command_build_clib.py                   |   4 +-
 70 files changed, 3460 insertions(+), 1895 deletions(-)
 create mode 100644 dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
 create mode 100644 tests/helper.py

diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 07bd44c8fd10..da29bf31dd09 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -29,11 +29,8 @@ jobs:
 
     strategy:
       matrix:
-        python: ['3.8', '3.9']
+        python: ['3.8', '3.9', '3.10']
         os: [ubuntu-20.04, windows-latest]
-        include:
-          - python: '3.10'
-            os: ubuntu-20.04
 
     runs-on: ${{ matrix.os }}
 
@@ -110,7 +107,7 @@ jobs:
 
     strategy:
       matrix:
-        python: ['3.8', '3.9']
+        python: ['3.8', '3.9', '3.10']
         os: [ubuntu-20.04, ubuntu-latest]
 
         experimental: [false]
@@ -215,7 +212,7 @@ jobs:
 
     strategy:
       matrix:
-        python: ['3.8', '3.9']
+        python: ['3.8', '3.9', '3.10']
         experimental: [false]
 
     continue-on-error: ${{ matrix.experimental }}
@@ -384,7 +381,7 @@ jobs:
 
     strategy:
       matrix:
-        python: ['3.8', '3.9']
+        python: ['3.8', '3.9', '3.10']
         os: [ubuntu-20.04, windows-latest]
 
     runs-on: ${{ matrix.os }}
diff --git a/0.build.sh b/0.build.sh
index 380214e6802c..b1a2a29ec0ae 100755
--- a/0.build.sh
+++ b/0.build.sh
@@ -4,6 +4,14 @@ THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]}))
 # . ${THEDIR}/0.env.sh
 cd ${THEDIR}
 
+# Assign $TMP env variable to a directory where the script locates.
+# The env variable is used by compiler as a path to temporary folder,
+# where it can store a temporary files generated during compilation and linkage phases.
+# By default the compiler uses /tmp folder, but it is limited by the size and
+# there might be not enough space to temporary keep all generated data.
+export TMP=${THEDIR}
+
+
 export DPNP_DEBUG=1
 
 python setup.py clean
@@ -17,7 +25,8 @@ CC=icpx python setup.py build_ext --inplace
 
 echo
 echo =========example3==============
-icpx -fsycl -g -fPIC dpnp/backend/examples/example3.cpp -Idpnp -Idpnp/backend/include -Ldpnp -Wl,-rpath='$ORIGIN'/dpnp -ldpnp_backend_c -o example3
+DPCTL_INCLUDES=$(python -m dpctl --includes)
+icpx -fsycl -g -O0 -ggdb3 -fPIC dpnp/backend/examples/example3.cpp $DPCTL_INCLUDES -Idpnp -Idpnp/backend/include -Ldpnp -Wl,-rpath='$ORIGIN'/dpnp -ldpnp_backend_c -o example3
 # LD_DEBUG=libs,bindings,symbols ./example3
 ./example3
 
@@ -39,7 +48,7 @@ icpx -fsycl -g -fPIC dpnp/backend/examples/example3.cpp -Idpnp -Idpnp/backend/in
 # strings /usr/share/miniconda/envs/dpnp*/lib/libstdc++.so | grep GLIBCXX | sort -n
 
 
-# echo
+echo
 echo =========example1==============
 # LD_DEBUG=libs,bindings,symbols python examples/example1.py
 # LD_DEBUG=libs python examples/example1.py
diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh
index d873320f80f8..164ad09d578f 100644
--- a/conda-recipe/build.sh
+++ b/conda-recipe/build.sh
@@ -29,6 +29,11 @@ fi
 export CFLAGS="-Wl,-rpath,\$ORIGIN/../dpctl,-rpath,\$ORIGIN $CFLAGS"
 export LDFLAGS="-Wl,-rpath,\$ORIGIN/../dpctl,-rpath,\$ORIGIN $LDFLAGS"
 
+# Intel LLVM must cooperate with compiler and sysroot from conda
+echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icpx_for_conda.cfg
+export ICPXCFG="$(pwd)/icpx_for_conda.cfg"
+export ICXCFG="$(pwd)/icpx_for_conda.cfg"
+
 $PYTHON setup.py build_clib
 $PYTHON setup.py build_ext install
 
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index b384776d2607..0c6e38f667db 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -11,17 +11,17 @@ requirements:
       - numpy 1.19
       - cython
       - cmake >=3.19
-      - dpctl >=0.13
-      - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2021.1.1') }}
+      - dpctl >=0.14
+      - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2023.0.0') }}
       - onedpl-devel
       - tbb-devel
       - wheel
     build:
       - {{ compiler('cxx') }}
-      - {{ compiler('dpcpp') }}  >=2022.1  # [not osx]
+      - {{ compiler('dpcpp') }}  >=2023.0  # [not osx]
     run:
       - python
-      - dpctl >=0.13
+      - dpctl >=0.14
       - {{ pin_compatible('dpcpp-cpp-rt', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('mkl-dpcpp', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('numpy', min_pin='x.x', max_pin='x') }}
diff --git a/doc/conf.py b/doc/conf.py
index 46505fa8f6db..999b2504bd64 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -33,7 +33,7 @@
 # The short X.Y version
 version = '0.11'
 # The full version, including alpha/beta/rc tags
-release = '0.11.0'
+release = '0.11.1'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 1714124cf85a..baee709b11ee 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -1,5 +1,5 @@
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -27,7 +27,7 @@
 
 cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
 
-# set(DPNP_VERSION 0.11.0)
+# set(DPNP_VERSION 0.11.1)
 # set(DPNP_API_VERSION 0.11)
 
 # set directory where the custom finders live
@@ -93,6 +93,7 @@ string(CONCAT COMMON_COMPILE_FLAGS
   "-fsycl "
   "-fsycl-device-code-split=per_kernel "
   "-fno-approx-func "
+  "-fno-finite-math-only "
 )
 string(CONCAT COMMON_LINK_FLAGS
   "-fsycl "
@@ -111,7 +112,7 @@ elseif(WIN32)
   # set(CMAKE_RANLIB "llvm-ranlib")
   # set(CMAKE_CXX_FLAGS "/EHsc")
 
-  string(APPEND COMMON_COMPILER_FLAGS
+  string(APPEND COMMON_COMPILE_FLAGS
     "/EHsc "
 #    "/Ox "
 #    "/W3 "
@@ -133,23 +134,29 @@ string(CONCAT DPNP_WARNING_FLAGS
   "-Wextra "
   "-Wshadow "
   "-Wall "
-  "-Wstring-prototypes "
+  "-Wstrict-prototypes "
   "-Wformat "
   "-Wformat-security "
 )
-string(APPEND COMMON_COMPILER_FLAGS
+string(APPEND COMMON_COMPILE_FLAGS
   "${DPNP_WARNING_FLAGS}"
 )
 
 # debug/release compile definitions
 if(DPNP_DEBUG_ENABLE)
   set(CMAKE_BUILD_TYPE "Debug")
-  string(APPEND COMMON_COMPILER_FLAGS
+  string(APPEND COMMON_COMPILE_FLAGS
     "-O0 "
+    "-ggdb3 "
+  )
+  string(APPEND COMMON_LINK_FLAGS
+    "-O0 "
+    "-ggdb3 "
+    "-fsycl-link-huge-device-code "
   )
 else()
   set(CMAKE_BUILD_TYPE "Release")
-  string(APPEND COMMON_COMPILER_FLAGS
+  string(APPEND COMMON_COMPILE_FLAGS
     "-O3 "
   )
 endif()
@@ -162,7 +169,7 @@ string(CONCAT DPNP_DEFS
   "-D_FORTIFY_SOURCE=2 "
 )
 if(NOT WIN32)
-  string(APPEND COMMON_COMPILER_FLAGS
+  string(APPEND COMMON_COMPILE_FLAGS
     "-fno-delete-null-pointer-checks "
     "-fstack-protector-strong "
     "-fno-strict-overflow "
diff --git a/dpnp/backend/doc/Doxyfile b/dpnp/backend/doc/Doxyfile
index 6c83bb0e8465..3d6c971a7991 100644
--- a/dpnp/backend/doc/Doxyfile
+++ b/dpnp/backend/doc/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "DPNP C++ backend kernel library"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 0.11.0
+PROJECT_NUMBER         = 0.11.1
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
index f5ee23d755f2..0f6cb5b31deb 100644
--- a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -23,6 +23,8 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#if defined(MACRO_1ARG_1TYPE_OP)
+
 /*
  * This header file contains single argument element wise functions definitions
  *
@@ -35,10 +37,6 @@
  *
  */
 
-#ifndef MACRO_1ARG_1TYPE_OP
-#error "MACRO_1ARG_1TYPE_OP is not defined"
-#endif
-
 #ifdef _SECTION_DOCUMENTATION_GENERATION_
 
 #define MACRO_1ARG_1TYPE_OP(__name__, __operation1__, __operation2__)                                                   \
@@ -88,7 +86,7 @@
                   const shape_elem_type* input1_strides,                                                                \
                   const size_t* where);
 
-#endif
+#endif // _SECTION_DOCUMENTATION_GENERATION_
 
 MACRO_1ARG_1TYPE_OP(dpnp_conjugate_c, std::conj(input_elem), q.submit(kernel_func))
 MACRO_1ARG_1TYPE_OP(dpnp_copy_c, input_elem, q.submit(kernel_func))
@@ -107,3 +105,62 @@ MACRO_1ARG_1TYPE_OP(dpnp_square_c,
                     oneapi::mkl::vm::sqr(q, input1_size, input1_data, result))
 
 #undef MACRO_1ARG_1TYPE_OP
+
+#elif defined(MACRO_1ARG_1TYPE_LOGIC_OP)
+
+/*
+ * This header file contains single argument element wise functions definitions
+ *
+ * Macro `MACRO_1ARG_1TYPE_LOGIC_OP` must be defined before usage
+ *
+ * Parameters:
+ * - public name of the function and kernel name
+ * - operation used to calculate the result
+ *
+ */
+
+#ifdef _SECTION_DOCUMENTATION_GENERATION_
+
+#define MACRO_1ARG_1TYPE_LOGIC_OP(__name__, __operation__)                                                             \
+    /** @ingroup BACKEND_API                                                                                         */ \
+    /** @brief Per element operation function __name__                                                               */ \
+    /**                                                                                                              */ \
+    /** Function "__name__" executes operator "__operation__" over corresponding elements of input array             */ \
+    /**                                                                                                              */ \
+    /** @param[in]  q_ref              Reference to SYCL queue.                                                      */ \
+    /** @param[out] result_out         Output array.                                                                 */ \
+    /** @param[in]  result_size        Output array size.                                                            */ \
+    /** @param[in]  result_ndim        Number of output array dimensions.                                            */ \
+    /** @param[in]  result_shape       Output array shape.                                                           */ \
+    /** @param[in]  result_strides     Output array strides.                                                         */ \
+    /** @param[in]  input1_in          Input array 1.                                                                */ \
+    /** @param[in]  input1_size        Input array 1 size.                                                           */ \
+    /** @param[in]  input1_ndim        Number of input array 1 dimensions.                                           */ \
+    /** @param[in]  input1_shape       Input array 1 shape.                                                          */ \
+    /** @param[in]  input1_strides     Input array 1 strides.                                                        */ \
+    /** @param[in]  where              Where condition.                                                              */ \
+    /** @param[in]  dep_event_vec_ref  Reference to vector of SYCL events.                                           */ \
+    template <typename _DataType_input1>                                                                                \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                 \
+                               void* result_out,                                                                        \
+                               const size_t result_size,                                                                \
+                               const size_t result_ndim,                                                                \
+                               const shape_elem_type* result_shape,                                                     \
+                               const shape_elem_type* result_strides,                                                   \
+                               const void* input1_in,                                                                   \
+                               const size_t input1_size,                                                                \
+                               const size_t input1_ndim,                                                                \
+                               const shape_elem_type* input1_shape,                                                     \
+                               const shape_elem_type* input1_strides,                                                   \
+                               const size_t* where,                                                                     \
+                               const DPCTLEventVectorRef dep_event_vec_ref);
+
+#endif // _SECTION_DOCUMENTATION_GENERATION_
+
+MACRO_1ARG_1TYPE_LOGIC_OP(dpnp_logical_not_c, !input1_elem)
+
+#undef MACRO_1ARG_1TYPE_LOGIC_OP
+
+#else
+#error "MACRO_1ARG_1TYPE_OP or MACRO_1ARG_1TYPE_LOGIC_OP is not defined"
+#endif // MACRO_1ARG_1TYPE_OP || MACRO_1ARG_1TYPE_LOGIC_OP
diff --git a/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
new file mode 100644
index 000000000000..4b6c4290ef31
--- /dev/null
+++ b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
@@ -0,0 +1,99 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+/*
+ * This header file contains single argument element wise functions definitions
+ *
+ * Macro `MACRO_2ARG_2TYPES_LOGIC_OP` must be defined before usage
+ *
+ * Parameters:
+ * - public name of the function and kernel name
+ * - operation used to calculate the result
+ *
+ */
+
+#ifndef MACRO_2ARG_2TYPES_LOGIC_OP
+#error "MACRO_2ARG_2TYPES_LOGIC_OP is not defined"
+#endif
+
+#ifdef _SECTION_DOCUMENTATION_GENERATION_
+
+#define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__)                                                             \
+    /** @ingroup BACKEND_API                                                                                         */ \
+    /** @brief Per element operation function __name__                                                               */ \
+    /**                                                                                                              */ \
+    /** Function "__name__" executes operator "__operation__" over corresponding elements of input arrays            */ \
+    /**                                                                                                              */ \
+    /** @param[in]  q_ref              Reference to SYCL queue.                                                      */ \
+    /** @param[out] result_out         Output array.                                                                 */ \
+    /** @param[in]  result_size        Output array size.                                                            */ \
+    /** @param[in]  result_ndim        Number of output array dimensions.                                            */ \
+    /** @param[in]  result_shape       Output array shape.                                                           */ \
+    /** @param[in]  result_strides     Output array strides.                                                         */ \
+    /** @param[in]  input1_in          Input array 1.                                                                */ \
+    /** @param[in]  input1_size        Input array 1 size.                                                           */ \
+    /** @param[in]  input1_ndim        Number of input array 1 dimensions.                                           */ \
+    /** @param[in]  input1_shape       Input array 1 shape.                                                          */ \
+    /** @param[in]  input1_strides     Input array 1 strides.                                                        */ \
+    /** @param[in]  input2_in          Input array 2.                                                                */ \
+    /** @param[in]  input2_size        Input array 2 size.                                                           */ \
+    /** @param[in]  input2_ndim        Number of input array 2 dimensions.                                           */ \
+    /** @param[in]  input2_shape       Input array 2 shape.                                                          */ \
+    /** @param[in]  input2_strides     Input array 2 strides.                                                        */ \
+    /** @param[in]  where              Where condition.                                                              */ \
+    /** @param[in]  dep_event_vec_ref  Reference to vector of SYCL events.                                           */ \
+    template <typename _DataType_input1, typename _DataType_input2>                                                     \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                 \
+                               void* result_out,                                                                        \
+                               const size_t result_size,                                                                \
+                               const size_t result_ndim,                                                                \
+                               const shape_elem_type* result_shape,                                                     \
+                               const shape_elem_type* result_strides,                                                   \
+                               const void* input1_in,                                                                   \
+                               const size_t input1_size,                                                                \
+                               const size_t input1_ndim,                                                                \
+                               const shape_elem_type* input1_shape,                                                     \
+                               const shape_elem_type* input1_strides,                                                   \
+                               const void* input2_in,                                                                   \
+                               const size_t input2_size,                                                                \
+                               const size_t input2_ndim,                                                                \
+                               const shape_elem_type* input2_shape,                                                     \
+                               const shape_elem_type* input2_strides,                                                   \
+                               const size_t* where,                                                                     \
+                               const DPCTLEventVectorRef dep_event_vec_ref);
+
+#endif
+
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_equal_c, input1_elem == input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_greater_c, input1_elem > input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_greater_equal_c, input1_elem >= input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_less_c, input1_elem < input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_less_equal_c, input1_elem <= input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_and_c, input1_elem && input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_or_c, input1_elem || input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_xor_c, (!!input1_elem) != (!!input2_elem))
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_not_equal_c, input1_elem != input2_elem)
+
+#undef MACRO_2ARG_2TYPES_LOGIC_OP
diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
index 5d4ae22f796f..e345c6eefea7 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,10 @@
  * Parameters:
  * - public name of the function and kernel name
  * - operation used to calculate the result
+ * - vector operation over SYCL group used to calculate the result
+ * - list of types vector operation accepts
  * - mkl operation used to calculate the result
+ * - list of types mkl operation accepts
  *
  */
 
@@ -41,11 +44,12 @@
 
 #ifdef _SECTION_DOCUMENTATION_GENERATION_
 
-#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__)                                                  \
+#define MACRO_2ARG_3TYPES_OP(                                                                                           \
+    __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__)                        \
     /** @ingroup BACKEND_API                                                                                         */ \
     /** @brief Per element operation function __name__                                                               */ \
     /**                                                                                                              */ \
-    /** Function "__name__" executes operator "__operation1__" over corresponding elements of input arrays           */ \
+    /** Function "__name__" executes operator "__operation__" over corresponding elements of input arrays            */ \
     /**                                                                                                              */ \
     /** @param[in]  q_ref              Reference to SYCL queue.                                                      */ \
     /** @param[out] result_out         Output array.                                                                 */ \
@@ -105,23 +109,84 @@
 
 #endif
 
-MACRO_2ARG_3TYPES_OP(dpnp_add_c, input1_elem + input2_elem, oneapi::mkl::vm::add)
-MACRO_2ARG_3TYPES_OP(dpnp_arctan2_c, sycl::atan2((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::atan2)
+MACRO_2ARG_3TYPES_OP(dpnp_add_c,
+                     input1_elem + input2_elem,
+                     x1 + x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
+                     oneapi::mkl::vm::add,
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
+
+MACRO_2ARG_3TYPES_OP(dpnp_arctan2_c,
+                     sycl::atan2((double)input1_elem, (double)input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::atan2,
+                     MACRO_UNPACK_TYPES(float, double))
+
 MACRO_2ARG_3TYPES_OP(dpnp_copysign_c,
                      sycl::copysign((double)input1_elem, (double)input2_elem),
-                     oneapi::mkl::vm::copysign)
-MACRO_2ARG_3TYPES_OP(dpnp_divide_c, input1_elem / input2_elem, oneapi::mkl::vm::div)
-MACRO_2ARG_3TYPES_OP(dpnp_fmod_c, sycl::fmod((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::fmod)
-MACRO_2ARG_3TYPES_OP(dpnp_hypot_c, sycl::hypot((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::hypot)
-MACRO_2ARG_3TYPES_OP(dpnp_maximum_c, sycl::max(input1_elem, input2_elem), oneapi::mkl::vm::fmax)
-MACRO_2ARG_3TYPES_OP(dpnp_minimum_c, sycl::min(input1_elem, input2_elem), oneapi::mkl::vm::fmin)
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::copysign,
+                     MACRO_UNPACK_TYPES(float, double))
+
+MACRO_2ARG_3TYPES_OP(dpnp_divide_c,
+                     input1_elem / input2_elem,
+                     x1 / x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
+                     oneapi::mkl::vm::div,
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
+
+MACRO_2ARG_3TYPES_OP(dpnp_fmod_c,
+                     sycl::fmod((double)input1_elem, (double)input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::fmod,
+                     MACRO_UNPACK_TYPES(float, double))
+
+MACRO_2ARG_3TYPES_OP(dpnp_hypot_c,
+                     sycl::hypot((double)input1_elem, (double)input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::hypot,
+                     MACRO_UNPACK_TYPES(float, double))
+
+MACRO_2ARG_3TYPES_OP(dpnp_maximum_c,
+                     sycl::max(input1_elem, input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::fmax,
+                     MACRO_UNPACK_TYPES(float, double))
+
+MACRO_2ARG_3TYPES_OP(dpnp_minimum_c,
+                     sycl::min(input1_elem, input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::fmin,
+                     MACRO_UNPACK_TYPES(float, double))
 
 // "multiply" needs to be standalone kernel (not autogenerated) due to complex algorithm. This is not an element wise.
 // pytest "tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3"
 // requires multiplication shape1[10] with shape2[10,1] and result expected as shape[10,10]
-MACRO_2ARG_3TYPES_OP(dpnp_multiply_c, input1_elem* input2_elem, oneapi::mkl::vm::mul)
+MACRO_2ARG_3TYPES_OP(dpnp_multiply_c,
+                     input1_elem * input2_elem,
+                     x1 * x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
+                     oneapi::mkl::vm::mul,
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
+
+MACRO_2ARG_3TYPES_OP(dpnp_power_c,
+                     sycl::pow((double)input1_elem, (double)input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::pow,
+                     MACRO_UNPACK_TYPES(float, double))
 
-MACRO_2ARG_3TYPES_OP(dpnp_power_c, sycl::pow((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::pow)
-MACRO_2ARG_3TYPES_OP(dpnp_subtract_c, input1_elem - input2_elem, oneapi::mkl::vm::sub)
+MACRO_2ARG_3TYPES_OP(dpnp_subtract_c,
+                     input1_elem - input2_elem,
+                     x1 - x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
+                     oneapi::mkl::vm::sub,
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
 
 #undef MACRO_2ARG_3TYPES_OP
diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp
index 42c05f0fd61d..7a80b40a3d2e 100644
--- a/dpnp/backend/include/dpnp_iface.hpp
+++ b/dpnp/backend/include/dpnp_iface.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -95,7 +95,7 @@ INP_DLLEXPORT void dpnp_queue_initialize_c(QueueOptions selector = QueueOptions:
  * @ingroup BACKEND_API
  * @brief SYCL queue device status.
  *
- * Return 1 if current @ref queue is related to cpu or host device. return 0 otherwise.
+ * Return 1 if current @ref queue is related to cpu device. return 0 otherwise.
  */
 INP_DLLEXPORT size_t dpnp_queue_is_cpu_c();
 
@@ -1806,7 +1806,31 @@ INP_DLLEXPORT void dpnp_invert_c(void* array1_in, void* result, size_t size);
 
 #include <dpnp_gen_1arg_2type_tbl.hpp>
 
-#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__)                                                 \
+#define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__)                                                           \
+    template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>                         \
+    INP_DLLEXPORT DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                  \
+                                             void* result_out,                                                         \
+                                             const size_t result_size,                                                 \
+                                             const size_t result_ndim,                                                 \
+                                             const shape_elem_type* result_shape,                                      \
+                                             const shape_elem_type* result_strides,                                    \
+                                             const void* input1_in,                                                    \
+                                             const size_t input1_size,                                                 \
+                                             const size_t input1_ndim,                                                 \
+                                             const shape_elem_type* input1_shape,                                      \
+                                             const shape_elem_type* input1_strides,                                    \
+                                             const void* input2_in,                                                    \
+                                             const size_t input2_size,                                                 \
+                                             const size_t input2_ndim,                                                 \
+                                             const shape_elem_type* input2_shape,                                      \
+                                             const shape_elem_type* input2_strides,                                    \
+                                             const size_t* where,                                                      \
+                                             const DPCTLEventVectorRef dep_event_vec_ref);
+
+#include <dpnp_gen_2arg_2type_tbl.hpp>
+
+#define MACRO_2ARG_3TYPES_OP(                                                                                          \
+    __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__)                       \
     template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>                         \
     INP_DLLEXPORT DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                  \
                                              void* result_out,                                                         \
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 8e209d38317a..fb154fcabfac 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -151,10 +151,10 @@ enum class DPNPFuncName : size_t
     DPNP_FN_EIG_EXT,                      /**< Used in numpy.linalg.eig() impl, requires extra parameters */
     DPNP_FN_EIGVALS,                      /**< Used in numpy.linalg.eigvals() impl  */
     DPNP_FN_EIGVALS_EXT,                  /**< Used in numpy.linalg.eigvals() impl, requires extra parameters */
+    DPNP_FN_EQUAL_EXT,                    /**< Used in numpy.equal() impl, requires extra parameters */
     DPNP_FN_ERF,                          /**< Used in scipy.special.erf impl  */
     DPNP_FN_ERF_EXT,                      /**< Used in scipy.special.erf impl, requires extra parameters */
     DPNP_FN_EYE,                          /**< Used in numpy.eye() impl  */
-    DPNP_FN_EYE_EXT,                      /**< Used in numpy.eye() impl, requires extra parameters */
     DPNP_FN_EXP,                          /**< Used in numpy.exp() impl  */
     DPNP_FN_EXP_EXT,                      /**< Used in numpy.exp() impl, requires extra parameters */
     DPNP_FN_EXP2,                         /**< Used in numpy.exp2() impl  */
@@ -179,6 +179,8 @@ enum class DPNPFuncName : size_t
     DPNP_FN_FMOD_EXT,                     /**< Used in numpy.fmod() impl, requires extra parameters  */
     DPNP_FN_FULL,                         /**< Used in numpy.full() impl  */
     DPNP_FN_FULL_LIKE,                    /**< Used in numpy.full_like() impl  */
+    DPNP_FN_GREATER_EXT,                  /**< Used in numpy.greater() impl, requires extra parameters */
+    DPNP_FN_GREATER_EQUAL_EXT,            /**< Used in numpy.greater_equal() impl, requires extra parameters */
     DPNP_FN_HYPOT,                        /**< Used in numpy.hypot() impl  */
     DPNP_FN_HYPOT_EXT,                    /**< Used in numpy.hypot() impl, requires extra parameters  */
     DPNP_FN_IDENTITY,                     /**< Used in numpy.identity() impl  */
@@ -193,6 +195,8 @@ enum class DPNPFuncName : size_t
     DPNP_FN_KRON_EXT,                     /**< Used in numpy.kron() impl, requires extra parameters  */
     DPNP_FN_LEFT_SHIFT,                   /**< Used in numpy.left_shift() impl  */
     DPNP_FN_LEFT_SHIFT_EXT,               /**< Used in numpy.left_shift() impl, requires extra parameters  */
+    DPNP_FN_LESS_EXT,                     /**< Used in numpy.less() impl, requires extra parameters */
+    DPNP_FN_LESS_EQUAL_EXT,               /**< Used in numpy.less_equal() impl, requires extra parameters */
     DPNP_FN_LOG,                          /**< Used in numpy.log() impl  */
     DPNP_FN_LOG_EXT,                      /**< Used in numpy.log() impl, requires extra parameters  */
     DPNP_FN_LOG10,                        /**< Used in numpy.log10() impl  */
@@ -201,6 +205,10 @@ enum class DPNPFuncName : size_t
     DPNP_FN_LOG2_EXT,                     /**< Used in numpy.log2() impl, requires extra parameters  */
     DPNP_FN_LOG1P,                        /**< Used in numpy.log1p() impl  */
     DPNP_FN_LOG1P_EXT,                    /**< Used in numpy.log1p() impl, requires extra parameters  */
+    DPNP_FN_LOGICAL_AND_EXT,              /**< Used in numpy.logical_and() impl, requires extra parameters */
+    DPNP_FN_LOGICAL_NOT_EXT,              /**< Used in numpy.logical_not() impl, requires extra parameters */
+    DPNP_FN_LOGICAL_OR_EXT,               /**< Used in numpy.logical_or() impl, requires extra parameters */
+    DPNP_FN_LOGICAL_XOR_EXT,              /**< Used in numpy.logical_xor() impl, requires extra parameters */
     DPNP_FN_MATMUL,                       /**< Used in numpy.matmul() impl  */
     DPNP_FN_MATMUL_EXT,                   /**< Used in numpy.matmul() impl, requires extra parameters */
     DPNP_FN_MATRIX_RANK,                  /**< Used in numpy.linalg.matrix_rank() impl  */
@@ -227,6 +235,7 @@ enum class DPNPFuncName : size_t
     DPNP_FN_NEGATIVE_EXT,                 /**< Used in numpy.negative() impl, requires extra parameters */
     DPNP_FN_NONZERO,                      /**< Used in numpy.nonzero() impl  */
     DPNP_FN_NONZERO_EXT,                  /**< Used in numpy.nonzero() impl, requires extra parameters */
+    DPNP_FN_NOT_EQUAL_EXT,                /**< Used in numpy.not_equal() impl, requires extra parameters */
     DPNP_FN_ONES,                         /**< Used in numpy.ones() impl */
     DPNP_FN_ONES_LIKE,                    /**< Used in numpy.ones_like() impl */
     DPNP_FN_PARTITION,                    /**< Used in numpy.partition() impl */
@@ -361,9 +370,7 @@ enum class DPNPFuncName : size_t
     DPNP_FN_TRI,                          /**< Used in numpy.tri() impl  */
     DPNP_FN_TRI_EXT,                      /**< Used in numpy.tri() impl, requires extra parameters */
     DPNP_FN_TRIL,                         /**< Used in numpy.tril() impl  */
-    DPNP_FN_TRIL_EXT,                     /**< Used in numpy.tril() impl, requires extra parameters */
     DPNP_FN_TRIU,                         /**< Used in numpy.triu() impl  */
-    DPNP_FN_TRIU_EXT,                     /**< Used in numpy.triu() impl, requires extra parameters */
     DPNP_FN_TRUNC,                        /**< Used in numpy.trunc() impl  */
     DPNP_FN_TRUNC_EXT,                    /**< Used in numpy.trunc() impl, requires extra parameters */
     DPNP_FN_VANDER,                       /**< Used in numpy.vander() impl  */
@@ -385,13 +392,13 @@ enum class DPNPFuncName : size_t
 enum class DPNPFuncType : size_t
 {
     DPNP_FT_NONE,     /**< Very first element of the enumeration */
+    DPNP_FT_BOOL,     /**< analog of numpy.bool_ or bool */
     DPNP_FT_INT,      /**< analog of numpy.int32 or int */
     DPNP_FT_LONG,     /**< analog of numpy.int64 or long */
     DPNP_FT_FLOAT,    /**< analog of numpy.float32 or float */
     DPNP_FT_DOUBLE,   /**< analog of numpy.float32 or double */
     DPNP_FT_CMPLX64,  /**< analog of numpy.complex64 or std::complex<float> */
-    DPNP_FT_CMPLX128, /**< analog of numpy.complex128 or std::complex<double> */
-    DPNP_FT_BOOL      /**< analog of numpy.bool or numpy.bool_ or bool */
+    DPNP_FT_CMPLX128  /**< analog of numpy.complex128 or std::complex<double> */
 };
 
 /**
@@ -410,8 +417,26 @@ size_t operator-(DPNPFuncType lhs, DPNPFuncType rhs);
  */
 typedef struct DPNPFuncData
 {
-    DPNPFuncType return_type; /**< return type identifier which expected by the @ref ptr function */
-    void* ptr;                /**< C++ backend function pointer */
+    DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr, const DPNPFuncType type_no_fp64, void* ptr_no_fp64)
+        : return_type(gen_type)
+        , ptr(gen_ptr)
+        , return_type_no_fp64(type_no_fp64)
+        , ptr_no_fp64(ptr_no_fp64)
+    {
+    }
+    DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr)
+        : DPNPFuncData(gen_type, gen_ptr, DPNPFuncType::DPNP_FT_NONE, nullptr)
+    {
+    }
+    DPNPFuncData()
+        : DPNPFuncData(DPNPFuncType::DPNP_FT_NONE, nullptr)
+    {
+    }
+
+    DPNPFuncType return_type;         /**< return type identifier which expected by the @ref ptr function */
+    void* ptr;                        /**< C++ backend function pointer */
+    DPNPFuncType return_type_no_fp64; /**< alternative return type identifier when no fp64 support by device */
+    void* ptr_no_fp64;                /**< alternative C++ backend function pointer when no fp64 support by device */
 } DPNPFuncData_t;
 
 /**
diff --git a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
index 71d93842feb0..a29fcca0975b 100644
--- a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -292,13 +292,6 @@ void dpnp_eye_c(void* result1, int k, const shape_elem_type* res_shape)
 template <typename _DataType>
 void (*dpnp_eye_default_c)(void*, int, const shape_elem_type*) = dpnp_eye_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_eye_ext_c)(DPCTLSyclQueueRef,
-                                    void*,
-                                    int,
-                                    const shape_elem_type*,
-                                    const DPCTLEventVectorRef) = dpnp_eye_c<_DataType>;
-
 template <typename _DataType>
 DPCTLSyclEventRef dpnp_full_c(DPCTLSyclQueueRef q_ref,
                               void* array_in,
@@ -1062,17 +1055,6 @@ void (*dpnp_tril_default_c)(void*,
                             const size_t,
                             const size_t) = dpnp_tril_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_tril_ext_c)(DPCTLSyclQueueRef,
-                                     void*,
-                                     void*,
-                                     const int,
-                                     shape_elem_type*,
-                                     shape_elem_type*,
-                                     const size_t,
-                                     const size_t,
-                                     const DPCTLEventVectorRef) = dpnp_tril_c<_DataType>;
-
 template <typename _DataType>
 DPCTLSyclEventRef dpnp_triu_c(DPCTLSyclQueueRef q_ref,
                               void* array_in,
@@ -1225,17 +1207,6 @@ void (*dpnp_triu_default_c)(void*,
                             const size_t,
                             const size_t) = dpnp_triu_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_triu_ext_c)(DPCTLSyclQueueRef,
-                                     void*,
-                                     void*,
-                                     const int,
-                                     shape_elem_type*,
-                                     shape_elem_type*,
-                                     const size_t,
-                                     const size_t,
-                                     const DPCTLEventVectorRef) = dpnp_triu_c<_DataType>;
-
 template <typename _DataType>
 DPCTLSyclEventRef dpnp_zeros_c(DPCTLSyclQueueRef q_ref,
                                void* result,
@@ -1319,11 +1290,6 @@ void func_map_init_arraycreation(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_EYE][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eye_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_EYE][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eye_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_eye_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_eye_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eye_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eye_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_FULL][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_full_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_FULL][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_full_default_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_FULL][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_full_default_c<float>};
@@ -1451,21 +1417,11 @@ void func_map_init_arraycreation(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_TRIL][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_tril_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_TRIL][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_tril_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_tril_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_tril_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_tril_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_tril_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_TRIU][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_triu_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_TRIU][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_triu_default_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_TRIU][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_triu_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_TRIU][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_triu_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_triu_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_triu_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_triu_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_triu_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_ZEROS][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_zeros_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_ZEROS][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_zeros_default_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_ZEROS][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_zeros_default_c<float>};
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index eafa50d4cee2..5133473d3935 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -825,7 +825,9 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     return;
 }
 
-#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__)                                                 \
+
+#define MACRO_2ARG_3TYPES_OP(                                                                                          \
+    __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__)                       \
     template <typename _KernelNameSpecialization1,                                                                     \
               typename _KernelNameSpecialization2,                                                                     \
               typename _KernelNameSpecialization3>                                                                     \
@@ -834,6 +836,11 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     template <typename _KernelNameSpecialization1,                                                                     \
               typename _KernelNameSpecialization2,                                                                     \
               typename _KernelNameSpecialization3>                                                                     \
+    class __name__##_sg_kernel;                                                                                        \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization1,                                                                     \
+              typename _KernelNameSpecialization2,                                                                     \
+              typename _KernelNameSpecialization3>                                                                     \
     class __name__##_broadcast_kernel;                                                                                 \
                                                                                                                        \
     template <typename _KernelNameSpecialization1,                                                                     \
@@ -874,45 +881,23 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                                                                                                        \
         sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));                                                      \
                                                                                                                        \
-        DPNPC_ptr_adapter<_DataType_input1> input1_ptr(q_ref, input1_in, input1_size);                                 \
-        DPNPC_ptr_adapter<shape_elem_type> input1_shape_ptr(q_ref, input1_shape, input1_ndim, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> input1_strides_ptr(q_ref, input1_strides, input1_ndim, true);               \
-        DPNPC_ptr_adapter<_DataType_input2> input2_ptr(q_ref, input2_in, input2_size);                                 \
-        DPNPC_ptr_adapter<shape_elem_type> input2_shape_ptr(q_ref, input2_shape, input2_ndim, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> input2_strides_ptr(q_ref, input2_strides, input2_ndim, true);               \
-                                                                                                                       \
-        DPNPC_ptr_adapter<_DataType_output> result_ptr(q_ref, result_out, result_size, false, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> result_shape_ptr(q_ref, result_shape, result_ndim);                         \
-        DPNPC_ptr_adapter<shape_elem_type> result_strides_ptr(q_ref, result_strides, result_ndim);                     \
-                                                                                                                       \
-        _DataType_input1* input1_data = input1_ptr.get_ptr();                                                          \
-        shape_elem_type* input1_shape_data = input1_shape_ptr.get_ptr();                                               \
-        shape_elem_type* input1_strides_data = input1_strides_ptr.get_ptr();                                           \
+        _DataType_input1* input1_data = static_cast<_DataType_input1*>(const_cast<void*>(input1_in));                  \
+        _DataType_input2* input2_data = static_cast<_DataType_input2*>(const_cast<void*>(input2_in));                  \
+        _DataType_output* result = static_cast<_DataType_output*>(result_out);                                         \
                                                                                                                        \
-        _DataType_input2* input2_data = input2_ptr.get_ptr();                                                          \
-        shape_elem_type* input2_shape_data = input2_shape_ptr.get_ptr();                                               \
-        shape_elem_type* input2_strides_data = input2_strides_ptr.get_ptr();                                           \
+        bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim);                    \
                                                                                                                        \
-        _DataType_output* result = result_ptr.get_ptr();                                                               \
-        shape_elem_type* result_shape_data = result_shape_ptr.get_ptr();                                               \
-        shape_elem_type* result_strides_data = result_strides_ptr.get_ptr();                                           \
+        shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
                                                                                                                        \
-        bool use_broadcasting = !array_equal(input1_shape_data, input1_ndim, input2_shape_data, input2_ndim);          \
+        get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
+        bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);               \
+        delete[] input1_shape_offsets;                                                                                 \
                                                                                                                        \
-        const size_t input1_shape_size_in_bytes = input1_ndim * sizeof(shape_elem_type);                               \
-        shape_elem_type* input1_shape_offsets =                                                                        \
-            reinterpret_cast<shape_elem_type*>(sycl::malloc_shared(input1_shape_size_in_bytes, q));                    \
-        get_shape_offsets_inkernel(input1_shape_data, input1_ndim, input1_shape_offsets);                              \
-        bool use_strides = !array_equal(input1_strides_data, input1_ndim, input1_shape_offsets, input1_ndim);          \
-        sycl::free(input1_shape_offsets, q);                                                                           \
+        shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim];                                      \
                                                                                                                        \
-        const size_t input2_shape_size_in_bytes = input2_ndim * sizeof(shape_elem_type);                               \
-        shape_elem_type* input2_shape_offsets =                                                                        \
-            reinterpret_cast<shape_elem_type*>(sycl::malloc_shared(input2_shape_size_in_bytes, q));                    \
-        get_shape_offsets_inkernel(input2_shape_data, input2_ndim, input2_shape_offsets);                              \
-        use_strides =                                                                                                  \
-            use_strides || !array_equal(input2_strides_data, input2_ndim, input2_shape_offsets, input2_ndim);          \
-        sycl::free(input2_shape_offsets, q);                                                                           \
+        get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets);                                   \
+        use_strides = use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim);     \
+        delete[] input2_shape_offsets;                                                                                 \
                                                                                                                        \
         sycl::event event;                                                                                             \
         sycl::range<1> gws(result_size);                                                                               \
@@ -921,28 +906,26 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
         {                                                                                                              \
             DPNPC_id<_DataType_input1>* input1_it;                                                                     \
             const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>);                                 \
-            input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref,                       \
-                                                                                          input1_it_size_in_bytes));   \
-            new (input1_it)                                                                                            \
-                DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape_data, input1_strides_data, input1_ndim);   \
+            input1_it =                                                                                                \
+                reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes));    \
+            new (input1_it) DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim); \
                                                                                                                        \
-            input1_it->broadcast_to_shape(result_shape_data, result_ndim);                                             \
+            input1_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
                                                                                                                        \
             DPNPC_id<_DataType_input2>* input2_it;                                                                     \
             const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>);                                 \
-            input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref,                       \
-                                                                                          input2_it_size_in_bytes));   \
-            new (input2_it)                                                                                            \
-                DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape_data, input2_strides_data, input2_ndim);   \
+            input2_it =                                                                                                \
+                reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes));    \
+            new (input2_it) DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim); \
                                                                                                                        \
-            input2_it->broadcast_to_shape(result_shape_data, result_ndim);                                             \
+            input2_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
                                                                                                                        \
             auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
-                const size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                            \
+                const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                          \
                 {                                                                                                      \
                     const _DataType_output input1_elem = (*input1_it)[i];                                              \
                     const _DataType_output input2_elem = (*input2_it)[i];                                              \
-                    result[i] = __operation1__;                                                                        \
+                    result[i] = __operation__;                                                                         \
                 }                                                                                                      \
             };                                                                                                         \
             auto kernel_func = [&](sycl::handler& cgh) {                                                               \
@@ -951,8 +934,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                     gws, kernel_parallel_for_func);                                                                    \
             };                                                                                                         \
                                                                                                                        \
-            event = q.submit(kernel_func);                                                                             \
-            event.wait();                                                                                              \
+            q.submit(kernel_func).wait();                                                                              \
                                                                                                                        \
             input1_it->~DPNPC_id();                                                                                    \
             input2_it->~DPNPC_id();                                                                                    \
@@ -961,11 +943,41 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
         }                                                                                                              \
         else if (use_strides)                                                                                          \
         {                                                                                                              \
+            if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim))                                          \
+            {                                                                                                          \
+                throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +                                \
+                                         " mismatches with either input1 ndim=" + std::to_string(input1_ndim) +        \
+                                         " or input2 ndim=" + std::to_string(input2_ndim));                            \
+            }                                                                                                          \
+                                                                                                                       \
+            /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */                 \
+            using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
+                                                                                                                       \
+            size_t strides_size = 3 * result_ndim;                                                                     \
+            shape_elem_type* dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+                                                                                                                       \
+            /* create host temporary for packed strides managed by shared pointer */                                   \
+            auto strides_host_packed =                                                                                 \
+                std::vector<shape_elem_type, usm_host_allocatorT>(strides_size, usm_host_allocatorT(q));               \
+                                                                                                                       \
+            /* packed vector is concatenation of result_strides, input1_strides and input2_strides */                  \
+            std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
+            std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
+            std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim);    \
+                                                                                                                       \
+            auto copy_strides_ev =                                                                                     \
+                q.copy<shape_elem_type>(strides_host_packed.data(), dev_strides_data, strides_host_packed.size());     \
+                                                                                                                       \
             auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
-                const size_t output_id = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                    \
+                const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
+                    const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type* input2_strides_data = &dev_strides_data[2];                                 \
+                                                                                                                       \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
+                                                                                                                       \
                     for (size_t i = 0; i < result_ndim; ++i)                                                           \
                     {                                                                                                  \
                         const size_t output_xyz_id =                                                                   \
@@ -976,34 +988,118 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                                                                                                        \
                     const _DataType_output input1_elem = input1_data[input1_id];                                       \
                     const _DataType_output input2_elem = input2_data[input2_id];                                       \
-                    result[output_id] = __operation1__;                                                                \
+                    result[output_id] = __operation__;                                                                 \
                 }                                                                                                      \
             };                                                                                                         \
             auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.depends_on(copy_strides_ev);                                                                       \
                 cgh.parallel_for<                                                                                      \
                     class __name__##_strides_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(            \
                     gws, kernel_parallel_for_func);                                                                    \
             };                                                                                                         \
                                                                                                                        \
-            event = q.submit(kernel_func);                                                                             \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            sycl::free(dev_strides_data, q);                                                                           \
+            return event_ref;                                                                                          \
         }                                                                                                              \
         else                                                                                                           \
         {                                                                                                              \
-            if constexpr ((std::is_same<_DataType_input1, double>::value ||                                            \
-                           std::is_same<_DataType_input1, float>::value) &&                                            \
-                          std::is_same<_DataType_input2, _DataType_input1>::value)                                     \
+            if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __mkl_types__>)                      \
             {                                                                                                          \
-                event = __operation2__(q, result_size, input1_data, input2_data, result);                              \
+                event = __mkl_operation__(q, result_size, input1_data, input2_data, result);                           \
             }                                                                                                          \
-            else                                                                                                       \
+            else if constexpr (none_of_both_types<_DataType_input1,                                                    \
+                                                  _DataType_input2,                                                    \
+                                                  std::complex<float>,                                                 \
+                                                  std::complex<double>>)                                               \
             {                                                                                                          \
-                auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                           \
-                    const size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                        \
+                constexpr size_t lws = 64;                                                                             \
+                constexpr unsigned int vec_sz = 8;                                                                     \
+                constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space;        \
+                                                                                                                       \
+                auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);            \
+                auto lws_range = sycl::range<1>(lws);                                                                  \
+                                                                                                                       \
+                auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {                                          \
+                    auto sg = nd_it.get_sub_group();                                                                   \
+                    const auto max_sg_size = sg.get_max_local_range()[0];                                              \
+                    const size_t start =                                                                               \
+                        vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); \
+                                                                                                                       \
+                    if (start + static_cast<size_t>(vec_sz) * max_sg_size < result_size)                               \
+                    {                                                                                                  \
+                        using input1_ptrT = sycl::multi_ptr<_DataType_input1, global_space>;                           \
+                        using input2_ptrT = sycl::multi_ptr<_DataType_input2, global_space>;                           \
+                        using result_ptrT = sycl::multi_ptr<_DataType_output, global_space>;                           \
+                                                                                                                       \
+                        sycl::vec<_DataType_output, vec_sz> res_vec;                                                   \
+                                                                                                                       \
+                        if constexpr (both_types_are_any_of<_DataType_input1, _DataType_input2, __vec_types__>)        \
+                        {                                                                                              \
+                            if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, _DataType_output>)   \
+                            {                                                                                          \
+                                sycl::vec<_DataType_input1, vec_sz> x1 =                                               \
+                                    sg.load<vec_sz>(input1_ptrT(&input1_data[start]));                                 \
+                                sycl::vec<_DataType_input2, vec_sz> x2 =                                               \
+                                    sg.load<vec_sz>(input2_ptrT(&input2_data[start]));                                 \
+                                                                                                                       \
+                                res_vec = __vec_operation__;                                                           \
+                            }                                                                                          \
+                            else /* input types don't match result type, so explicit casting is required */            \
+                            {                                                                                          \
+                                sycl::vec<_DataType_output, vec_sz> x1 =                                               \
+                                    dpnp_vec_cast<_DataType_output, _DataType_input1, vec_sz>(                         \
+                                        sg.load<vec_sz>(input1_ptrT(&input1_data[start])));                            \
+                                sycl::vec<_DataType_output, vec_sz> x2 =                                               \
+                                    dpnp_vec_cast<_DataType_output, _DataType_input2, vec_sz>(                         \
+                                        sg.load<vec_sz>(input2_ptrT(&input2_data[start])));                            \
+                                                                                                                       \
+                                res_vec = __vec_operation__;                                                           \
+                            }                                                                                          \
+                        }                                                                                              \
+                        else                                                                                           \
+                        {                                                                                              \
+                            sycl::vec<_DataType_input1, vec_sz> x1 =                                                   \
+                                sg.load<vec_sz>(input1_ptrT(&input1_data[start]));                                     \
+                            sycl::vec<_DataType_input2, vec_sz> x2 =                                                   \
+                                sg.load<vec_sz>(input2_ptrT(&input2_data[start]));                                     \
+                                                                                                                       \
+                            for (size_t k = 0; k < vec_sz; ++k)                                                        \
+                            {                                                                                          \
+                                const _DataType_output input1_elem = x1[k];                                            \
+                                const _DataType_output input2_elem = x2[k];                                            \
+                                res_vec[k] = __operation__;                                                            \
+                            }                                                                                          \
+                        }                                                                                              \
+                        sg.store<vec_sz>(result_ptrT(&result[start]), res_vec);                                        \
+                    }                                                                                                  \
+                    else                                                                                               \
                     {                                                                                                  \
-                        const _DataType_output input1_elem = input1_data[i];                                           \
-                        const _DataType_output input2_elem = input2_data[i];                                           \
-                        result[i] = __operation1__;                                                                    \
+                        for (size_t k = start + sg.get_local_id()[0]; k < result_size; k += max_sg_size)               \
+                        {                                                                                              \
+                            const _DataType_output input1_elem = input1_data[k];                                       \
+                            const _DataType_output input2_elem = input2_data[k];                                       \
+                            result[k] = __operation__;                                                                 \
+                        }                                                                                              \
                     }                                                                                                  \
+                };                                                                                                     \
+                                                                                                                       \
+                auto kernel_func = [&](sycl::handler& cgh) {                                                           \
+                    cgh.parallel_for<                                                                                  \
+                        class __name__##_sg_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(             \
+                        sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func);                            \
+                };                                                                                                     \
+                event = q.submit(kernel_func);                                                                         \
+            }                                                                                                          \
+            else /* either input1 or input2 has complex type */                                                        \
+            {                                                                                                          \
+                auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                           \
+                    const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                      \
+                                                                                                                       \
+                    const _DataType_output input1_elem = input1_data[i];                                               \
+                    const _DataType_output input2_elem = input2_data[i];                                               \
+                    result[i] = __operation__;                                                                         \
                 };                                                                                                     \
                 auto kernel_func = [&](sycl::handler& cgh) {                                                           \
                     cgh.parallel_for<class __name__##_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(   \
@@ -1013,18 +1109,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
             }                                                                                                          \
         }                                                                                                              \
                                                                                                                        \
-        input1_ptr.depends_on(event);                                                                                  \
-        input1_shape_ptr.depends_on(event);                                                                            \
-        input1_strides_ptr.depends_on(event);                                                                          \
-        input2_ptr.depends_on(event);                                                                                  \
-        input2_shape_ptr.depends_on(event);                                                                            \
-        input2_strides_ptr.depends_on(event);                                                                          \
-        result_ptr.depends_on(event);                                                                                  \
-        result_shape_ptr.depends_on(event);                                                                            \
-        result_strides_ptr.depends_on(event);                                                                          \
-                                                                                                                       \
         event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
-                                                                                                                       \
         return DPCTLEvent_Copy(event_ref);                                                                             \
     }                                                                                                                  \
                                                                                                                        \
@@ -1048,26 +1133,25 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     {                                                                                                                  \
         DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);                                    \
         DPCTLEventVectorRef dep_event_vec_ref = nullptr;                                                               \
-        DPCTLSyclEventRef event_ref = __name__<_DataType_output, _DataType_input1, _DataType_input2>(                  \
-            q_ref,                                                                                                     \
-            result_out,                                                                                                \
-            result_size,                                                                                               \
-            result_ndim,                                                                                               \
-            result_shape,                                                                                              \
-            result_strides,                                                                                            \
-            input1_in,                                                                                                 \
-            input1_size,                                                                                               \
-            input1_ndim,                                                                                               \
-            input1_shape,                                                                                              \
-            input1_strides,                                                                                            \
-            input2_in,                                                                                                 \
-            input2_size,                                                                                               \
-            input2_ndim,                                                                                               \
-            input2_shape,                                                                                              \
-            input2_strides,                                                                                            \
-            where,                                                                                                     \
-            dep_event_vec_ref                                                                                          \
-        );                                                                                                             \
+        DPCTLSyclEventRef event_ref =                                                                                  \
+            __name__<_DataType_output, _DataType_input1, _DataType_input2>(q_ref,                                      \
+                                                                           result_out,                                 \
+                                                                           result_size,                                \
+                                                                           result_ndim,                                \
+                                                                           result_shape,                               \
+                                                                           result_strides,                             \
+                                                                           input1_in,                                  \
+                                                                           input1_size,                                \
+                                                                           input1_ndim,                                \
+                                                                           input1_shape,                               \
+                                                                           input1_strides,                             \
+                                                                           input2_in,                                  \
+                                                                           input2_size,                                \
+                                                                           input2_ndim,                                \
+                                                                           input2_shape,                               \
+                                                                           input2_strides,                             \
+                                                                           where,                                      \
+                                                                           dep_event_vec_ref);                         \
         DPCTLEvent_WaitAndThrow(event_ref);                                                                            \
         DPCTLEvent_Delete(event_ref);                                                                                  \
     }                                                                                                                  \
@@ -1108,12 +1192,91 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                         const shape_elem_type*,                                                        \
                                         const shape_elem_type*,                                                        \
                                         const size_t*,                                                                 \
-                                        const DPCTLEventVectorRef) = __name__<_DataType_output,                        \
-                                                                              _DataType_input1,                        \
-                                                                              _DataType_input2>;
+                                        const DPCTLEventVectorRef) =                                                   \
+        __name__<_DataType_output, _DataType_input1, _DataType_input2>;
 
 #include <dpnp_gen_2arg_3type_tbl.hpp>
 
+template <DPNPFuncType FT1, DPNPFuncType FT2, typename has_fp64 = std::true_type>
+static constexpr DPNPFuncType get_divide_res_type()
+{
+    constexpr auto widest_type = populate_func_types<FT1, FT2>();
+    constexpr auto shortes_type = (widest_type == FT1) ? FT2 : FT1;
+
+    if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX128 || widest_type == DPNPFuncType::DPNP_FT_DOUBLE)
+    {
+        return widest_type;
+    }
+    else if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX64)
+    {
+        if constexpr (shortes_type == DPNPFuncType::DPNP_FT_DOUBLE)
+        {
+            return DPNPFuncType::DPNP_FT_CMPLX128;
+        }
+        else if constexpr (has_fp64::value &&
+                           (shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG))
+        {
+            return DPNPFuncType::DPNP_FT_CMPLX128;
+        }
+    }
+    else if constexpr (widest_type == DPNPFuncType::DPNP_FT_FLOAT)
+    {
+        if constexpr (has_fp64::value &&
+                      (shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG))
+        {
+            return DPNPFuncType::DPNP_FT_DOUBLE;
+        }
+    }
+    else if constexpr (has_fp64::value)
+    {
+        return DPNPFuncType::DPNP_FT_DOUBLE;
+    }
+    else
+    {
+        return DPNPFuncType::DPNP_FT_FLOAT;
+    }
+    return widest_type;
+}
+
+template <DPNPFuncType FT1, DPNPFuncType... FTs>
+static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
+{
+    ((fmap[DPNPFuncName::DPNP_FN_ADD_EXT][FT1][FTs] =
+          {populate_func_types<FT1, FTs>(),
+           (void*)dpnp_add_c_ext<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
+                                 func_type_map_t::find_type<FT1>,
+                                 func_type_map_t::find_type<FTs>>}),
+     ...);
+    ((fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][FT1][FTs] =
+          {populate_func_types<FT1, FTs>(),
+           (void*)dpnp_multiply_c_ext<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
+                                      func_type_map_t::find_type<FT1>,
+                                      func_type_map_t::find_type<FTs>>}),
+     ...);
+    ((fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][FT1][FTs] =
+          {populate_func_types<FT1, FTs>(),
+           (void*)dpnp_subtract_c_ext<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
+                                      func_type_map_t::find_type<FT1>,
+                                      func_type_map_t::find_type<FTs>>}),
+     ...);
+    ((fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][FT1][FTs] =
+          {get_divide_res_type<FT1, FTs>(),
+           (void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs>()>,
+                                    func_type_map_t::find_type<FT1>,
+                                    func_type_map_t::find_type<FTs>>,
+           get_divide_res_type<FT1, FTs, std::false_type>(),
+           (void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs, std::false_type>()>,
+                                    func_type_map_t::find_type<FT1>,
+                                    func_type_map_t::find_type<FTs>>}),
+     ...);
+}
+
+template <DPNPFuncType... FTs>
+static void func_map_elemwise_2arg_3type_helper(func_map_t& fmap)
+{
+    ((func_map_elemwise_2arg_3type_core<FTs, FTs...>(fmap)), ...);
+}
+
 static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
 {
     fmap[DPNPFuncName::DPNP_FN_ADD][eft_INT][eft_INT] = {eft_INT,
@@ -1149,39 +1312,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_ADD][eft_DBL][eft_DBL] = {eft_DBL,
                                                          (void*)dpnp_add_c_default<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_INT] = {eft_INT,
-                                                             (void*)dpnp_add_c_ext<int32_t, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_LNG] = {eft_LNG,
-                                                             (void*)dpnp_add_c_ext<int64_t, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_FLT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_DBL] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_INT] = {eft_LNG,
-                                                             (void*)dpnp_add_c_ext<int64_t, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_LNG] = {eft_LNG,
-                                                             (void*)dpnp_add_c_ext<int64_t, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_FLT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_DBL] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_INT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_LNG] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_FLT] = {eft_FLT,
-                                                             (void*)dpnp_add_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_DBL] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_INT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_LNG] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_FLT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_DBL] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, double, double>};
-
     fmap[DPNPFuncName::DPNP_FN_ARCTAN2][eft_INT][eft_INT] = {eft_DBL,
                                                              (void*)dpnp_arctan2_c_default<double, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_ARCTAN2][eft_INT][eft_LNG] = {eft_DBL,
@@ -1347,39 +1477,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_DIVIDE][eft_DBL][eft_DBL] = {eft_DBL,
                                                             (void*)dpnp_divide_c_default<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_FLT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_FLT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_FLT] = {eft_FLT,
-                                                                (void*)dpnp_divide_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_FLT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, double>};
-
     fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_INT] = {eft_INT,
                                                           (void*)dpnp_fmod_c_default<int32_t, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_LNG] = {eft_LNG,
@@ -1725,111 +1822,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C128][eft_C128] = {
         eft_C128, (void*)dpnp_multiply_c_default<std::complex<double>, std::complex<double>, std::complex<double>>};
 
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_BLN] = {
-        eft_BLN, (void*)dpnp_multiply_c_ext<bool, bool, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_INT] = {
-        eft_INT, (void*)dpnp_multiply_c_ext<int32_t, bool, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_LNG] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, bool, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_FLT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, bool, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_DBL] = {
-        eft_DBL,  (void*)dpnp_multiply_c_ext<double, bool, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, bool, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, bool, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_BLN] = {
-        eft_INT, (void*)dpnp_multiply_c_ext<int32_t, int32_t, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void*)dpnp_multiply_c_ext<int32_t, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_LNG] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_FLT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_DBL] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, int32_t, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, int32_t, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_BLN] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, int64_t, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_INT] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_FLT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_DBL] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, int64_t, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, int64_t, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_BLN] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, float, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_INT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_LNG] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_DBL] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, float, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, float, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_BLN] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_INT] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_LNG] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_FLT] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, double, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, double, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_BLN] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_INT] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_LNG] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_FLT] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_DBL] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_C128] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<float>, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_BLN] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_INT] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_LNG] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_FLT] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_DBL] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_C64] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_C128] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, std::complex<double>>};
-
     fmap[DPNPFuncName::DPNP_FN_POWER][eft_INT][eft_INT] = {eft_INT,
                                                            (void*)dpnp_power_c_default<int32_t, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_POWER][eft_INT][eft_LNG] = {eft_LNG,
@@ -1929,38 +1921,7 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_SUBTRACT][eft_DBL][eft_DBL] = {
         eft_DBL, (void*)dpnp_subtract_c_default<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void*)dpnp_subtract_c_ext<int32_t, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_LNG] = {
-        eft_LNG, (void*)dpnp_subtract_c_ext<int64_t, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_FLT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_DBL] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_INT] = {
-        eft_LNG, (void*)dpnp_subtract_c_ext<int64_t, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void*)dpnp_subtract_c_ext<int64_t, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_FLT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_DBL] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_INT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_LNG] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void*)dpnp_subtract_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_DBL] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_INT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_LNG] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_FLT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, double, double>};
+    func_map_elemwise_2arg_3type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL, eft_C64, eft_C128>(fmap);
 
     return;
 }
diff --git a/dpnp/backend/kernels/dpnp_krnl_fft.cpp b/dpnp/backend/kernels/dpnp_krnl_fft.cpp
index 3d39f2f373c7..b3f9716d73f1 100644
--- a/dpnp/backend/kernels/dpnp_krnl_fft.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_fft.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -182,7 +182,10 @@ static void dpnp_fft_fft_mathlib_cmplx_to_cmplx_c(DPCTLSyclQueueRef q_ref,
                                                   size_t inverse,
                                                   const size_t norm)
 {
+    // avoid warning unused variable
     (void)result_shape;
+    (void)input_size;
+    (void)result_size;
 
     if (!shape_size) {
         return;
@@ -253,6 +256,9 @@ static DPCTLSyclEventRef dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef
                                                               const size_t norm,
                                                               const size_t real)
 {
+    // avoid warning unused variable
+    (void)input_size;
+
     DPCTLSyclEventRef event_ref = nullptr;
     if (!shape_size) {
         return event_ref;
diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
index 5cde013b69f8..0b80ac678d34 100644
--- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -896,6 +896,7 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref,
                               const DPCTLEventVectorRef dep_event_vec_ref)
 {
     // avoid warning unused variable
+    (void)array1_size;
     (void)dep_event_vec_ref;
 
     DPCTLSyclEventRef event_ref = nullptr;
diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index 109246913589..157347aa90c0 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -27,6 +27,7 @@
 
 #include "dpnp_fptr.hpp"
 #include "dpnp_iface.hpp"
+#include "dpnp_iterator.hpp"
 #include "dpnpc_memory_adapter.hpp"
 #include "queue_sycl.hpp"
 
@@ -286,6 +287,457 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
                                     const size_t,
                                     const DPCTLEventVectorRef) = dpnp_any_c<_DataType, _ResultType>;
 
+
+#define MACRO_1ARG_1TYPE_LOGIC_OP(__name__, __operation__)                                                             \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_kernel;                                                                                           \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_broadcast_kernel;                                                                                 \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_strides_kernel;                                                                                   \
+                                                                                                                       \
+    template <typename _DataType_input1>                                                                               \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                \
+                               void* result_out,                                                                       \
+                               const size_t result_size,                                                               \
+                               const size_t result_ndim,                                                               \
+                               const shape_elem_type* result_shape,                                                    \
+                               const shape_elem_type* result_strides,                                                  \
+                               const void* input1_in,                                                                  \
+                               const size_t input1_size,                                                               \
+                               const size_t input1_ndim,                                                               \
+                               const shape_elem_type* input1_shape,                                                    \
+                               const shape_elem_type* input1_strides,                                                  \
+                               const size_t* where,                                                                    \
+                               const DPCTLEventVectorRef dep_event_vec_ref)                                            \
+    {                                                                                                                  \
+        /* avoid warning unused variable*/                                                                             \
+        (result_shape);                                                                                                \
+        (void)where;                                                                                                   \
+        (void)dep_event_vec_ref;                                                                                       \
+                                                                                                                       \
+        DPCTLSyclEventRef event_ref = nullptr;                                                                         \
+                                                                                                                       \
+        if (!input1_size)                                                                                              \
+        {                                                                                                              \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+                                                                                                                       \
+        sycl::queue q = *(reinterpret_cast<sycl::queue *>(q_ref));                                                     \
+                                                                                                                       \
+        _DataType_input1* input1_data = static_cast<_DataType_input1 *>(const_cast<void *>(input1_in));                \
+        bool* result = static_cast<bool *>(result_out);                                                                \
+                                                                                                                       \
+        shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
+                                                                                                                       \
+        get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
+        bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);               \
+        delete[] input1_shape_offsets;                                                                                 \
+                                                                                                                       \
+        if (use_strides)                                                                                               \
+        {                                                                                                              \
+            if (result_ndim != input1_ndim)                                                                            \
+            {                                                                                                          \
+                throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +                                \
+                                         " mismatches with input1 ndim=" + std::to_string(input1_ndim));               \
+            }                                                                                                          \
+                                                                                                                       \
+            /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */                 \
+            using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
+                                                                                                                       \
+            size_t strides_size = 2 * result_ndim;                                                                     \
+            shape_elem_type *dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+                                                                                                                       \
+            /* create host temporary for packed strides managed by shared pointer */                                   \
+            auto strides_host_packed = std::vector<shape_elem_type, usm_host_allocatorT>(strides_size,                 \
+                                                                                         usm_host_allocatorT(q));      \
+                                                                                                                       \
+            /* packed vector is concatenation of result_strides and input1_strides */                                  \
+            std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
+            std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
+                                                                                                                       \
+            auto copy_strides_ev = q.copy<shape_elem_type>(strides_host_packed.data(),                                 \
+                                                           dev_strides_data,                                           \
+                                                           strides_host_packed.size());                                \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
+                const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
+                {                                                                                                      \
+                    const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
+                                                                                                                       \
+                    size_t input1_id = 0;                                                                              \
+                                                                                                                       \
+                    for (size_t i = 0; i < result_ndim; ++i)                                                           \
+                    {                                                                                                  \
+                        const size_t output_xyz_id =                                                                   \
+                            get_xyz_id_by_id_inkernel(output_id, result_strides_data, result_ndim, i);                 \
+                        input1_id += output_xyz_id * input1_strides_data[i];                                           \
+                    }                                                                                                  \
+                                                                                                                       \
+                    const _DataType_input1 input1_elem = input1_data[input1_id];                                       \
+                    result[output_id] = __operation__;                                                                 \
+                }                                                                                                      \
+            };                                                                                                         \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.depends_on(copy_strides_ev);                                                                       \
+                cgh.parallel_for<class __name__##_strides_kernel<_DataType_input1>>(                                   \
+                    sycl::range<1>(result_size), kernel_parallel_for_func);                                            \
+            };                                                                                                         \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            sycl::free(dev_strides_data, q);                                                                           \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+        else                                                                                                           \
+        {                                                                                                              \
+            constexpr size_t lws = 64;                                                                                 \
+            constexpr unsigned int vec_sz = 8;                                                                         \
+            constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space;            \
+                                                                                                                       \
+            auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);                \
+            auto lws_range = sycl::range<1>(lws);                                                                      \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {                                              \
+                auto sg = nd_it.get_sub_group();                                                                       \
+                const auto max_sg_size = sg.get_max_local_range()[0];                                                  \
+                const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +                         \
+                                               sg.get_group_id()[0] * max_sg_size);                                    \
+                                                                                                                       \
+                if (start + static_cast<size_t>(vec_sz) * max_sg_size < result_size) {                                 \
+                    sycl::vec<_DataType_input1, vec_sz> x1 =                                                           \
+                        sg.load<vec_sz>(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start]));         \
+                    sycl::vec<bool, vec_sz> res_vec;                                                                   \
+                                                                                                                       \
+                    for (size_t k = 0; k < vec_sz; ++k) {                                                              \
+                        const _DataType_input1 input1_elem = x1[k];                                                    \
+                        res_vec[k] = __operation__;                                                                    \
+                    }                                                                                                  \
+                    sg.store<vec_sz>(sycl::multi_ptr<bool, global_space>(&result[start]), res_vec);                    \
+                                                                                                                       \
+                }                                                                                                      \
+                else {                                                                                                 \
+                    for (size_t k = start; k < result_size; ++k) {                                                     \
+                        const _DataType_input1 input1_elem = input1_data[k];                                           \
+                        result[k] = __operation__;                                                                     \
+                    }                                                                                                  \
+                }                                                                                                      \
+            };                                                                                                         \
+                                                                                                                       \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.parallel_for<class __name__##_kernel<_DataType_input1>>(                                           \
+                    sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func);                                \
+            };                                                                                                         \
+            sycl::event event = q.submit(kernel_func);                                                                 \
+                                                                                                                       \
+            event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                   \
+            return DPCTLEvent_Copy(event_ref);                                                                         \
+        }                                                                                                              \
+        return event_ref;                                                                                              \
+    }                                                                                                                  \
+                                                                                                                       \
+    template <typename _DataType_input1>                                                                               \
+    DPCTLSyclEventRef (*__name__##_ext)(DPCTLSyclQueueRef,                                                             \
+                                        void*,                                                                         \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const void*,                                                                   \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const size_t*,                                                                 \
+                                        const DPCTLEventVectorRef) = __name__<_DataType_input1>;
+
+#include <dpnp_gen_1arg_1type_tbl.hpp>
+
+template <DPNPFuncType ... FTs>
+static void func_map_logic_1arg_1type_helper(func_map_t& fmap)
+{
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_NOT_EXT][FTs][FTs] =
+        {eft_BLN, (void*)dpnp_logical_not_c_ext<func_type_map_t::find_type<FTs>>}), ...);
+}
+
+
+#define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__)                                                            \
+    template <typename _KernelNameSpecialization1,                                                                     \
+              typename _KernelNameSpecialization2>                                                                     \
+    class __name__##_kernel;                                                                                           \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization1,                                                                     \
+              typename _KernelNameSpecialization2>                                                                     \
+    class __name__##_broadcast_kernel;                                                                                 \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization1,                                                                     \
+              typename _KernelNameSpecialization2>                                                                     \
+    class __name__##_strides_kernel;                                                                                   \
+                                                                                                                       \
+    template <typename _DataType_input1, typename _DataType_input2>                                                    \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                \
+                               void* result_out,                                                                       \
+                               const size_t result_size,                                                               \
+                               const size_t result_ndim,                                                               \
+                               const shape_elem_type* result_shape,                                                    \
+                               const shape_elem_type* result_strides,                                                  \
+                               const void* input1_in,                                                                  \
+                               const size_t input1_size,                                                               \
+                               const size_t input1_ndim,                                                               \
+                               const shape_elem_type* input1_shape,                                                    \
+                               const shape_elem_type* input1_strides,                                                  \
+                               const void* input2_in,                                                                  \
+                               const size_t input2_size,                                                               \
+                               const size_t input2_ndim,                                                               \
+                               const shape_elem_type* input2_shape,                                                    \
+                               const shape_elem_type* input2_strides,                                                  \
+                               const size_t* where,                                                                    \
+                               const DPCTLEventVectorRef dep_event_vec_ref)                                            \
+    {                                                                                                                  \
+        /* avoid warning unused variable*/                                                                             \
+        (void)where;                                                                                                   \
+        (void)dep_event_vec_ref;                                                                                       \
+                                                                                                                       \
+        DPCTLSyclEventRef event_ref = nullptr;                                                                         \
+                                                                                                                       \
+        if (!input1_size || !input2_size)                                                                              \
+        {                                                                                                              \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+                                                                                                                       \
+        sycl::queue q = *(reinterpret_cast<sycl::queue *>(q_ref));                                                     \
+                                                                                                                       \
+        _DataType_input1* input1_data = static_cast<_DataType_input1 *>(const_cast<void *>(input1_in));                \
+        _DataType_input2* input2_data = static_cast<_DataType_input2 *>(const_cast<void *>(input2_in));                \
+        bool* result = static_cast<bool *>(result_out);                                                                \
+                                                                                                                       \
+        bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim);                    \
+                                                                                                                       \
+        shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
+                                                                                                                       \
+        get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
+        bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);               \
+        delete[] input1_shape_offsets;                                                                                 \
+                                                                                                                       \
+        shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim];                                      \
+                                                                                                                       \
+        get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets);                                   \
+        use_strides =                                                                                                  \
+            use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim);               \
+        delete[] input2_shape_offsets;                                                                                 \
+                                                                                                                       \
+        sycl::event event;                                                                                             \
+        sycl::range<1> gws(result_size); /* used only when use_broadcasting or use_strides is true */                  \
+                                                                                                                       \
+        if (use_broadcasting)                                                                                          \
+        {                                                                                                              \
+            DPNPC_id<_DataType_input1>* input1_it;                                                                     \
+            const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>);                                 \
+            input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref,                       \
+                                                                                          input1_it_size_in_bytes));   \
+            new (input1_it)                                                                                            \
+                DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim);             \
+                                                                                                                       \
+            input1_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
+                                                                                                                       \
+            DPNPC_id<_DataType_input2>* input2_it;                                                                     \
+            const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>);                                 \
+            input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref,                       \
+                                                                                          input2_it_size_in_bytes));   \
+            new (input2_it)                                                                                            \
+                DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim);             \
+                                                                                                                       \
+            input2_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
+                const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                          \
+                {                                                                                                      \
+                    const _DataType_input1 input1_elem = (*input1_it)[i];                                              \
+                    const _DataType_input2 input2_elem = (*input2_it)[i];                                              \
+                    result[i] = __operation__;                                                                         \
+                }                                                                                                      \
+            };                                                                                                         \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.parallel_for<                                                                                      \
+                    class __name__##_broadcast_kernel<_DataType_input1, _DataType_input2>>(                            \
+                    gws, kernel_parallel_for_func);                                                                    \
+            };                                                                                                         \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            input1_it->~DPNPC_id();                                                                                    \
+            input2_it->~DPNPC_id();                                                                                    \
+                                                                                                                       \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+        else if (use_strides)                                                                                          \
+        {                                                                                                              \
+            if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim))                                          \
+            {                                                                                                          \
+                throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +                                \
+                                         " mismatches with either input1 ndim=" + std::to_string(input1_ndim) +        \
+                                         " or input2 ndim=" + std::to_string(input2_ndim));                            \
+            }                                                                                                          \
+                                                                                                                       \
+            /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */                 \
+            using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
+                                                                                                                       \
+            size_t strides_size = 3 * result_ndim;                                                                     \
+            shape_elem_type *dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+                                                                                                                       \
+            /* create host temporary for packed strides managed by shared pointer */                                   \
+            auto strides_host_packed = std::vector<shape_elem_type, usm_host_allocatorT>(strides_size,                 \
+                                                                                         usm_host_allocatorT(q));      \
+                                                                                                                       \
+            /* packed vector is concatenation of result_strides, input1_strides and input2_strides */                  \
+            std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
+            std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
+            std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim);    \
+                                                                                                                       \
+            auto copy_strides_ev = q.copy<shape_elem_type>(strides_host_packed.data(),                                 \
+                                                           dev_strides_data,                                           \
+                                                           strides_host_packed.size());                                \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
+                const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
+                {                                                                                                      \
+                    const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type *input2_strides_data = &dev_strides_data[2];                                 \
+                                                                                                                       \
+                    size_t input1_id = 0;                                                                              \
+                    size_t input2_id = 0;                                                                              \
+                                                                                                                       \
+                    for (size_t i = 0; i < result_ndim; ++i)                                                           \
+                    {                                                                                                  \
+                        const size_t output_xyz_id =                                                                   \
+                            get_xyz_id_by_id_inkernel(output_id, result_strides_data, result_ndim, i);                 \
+                        input1_id += output_xyz_id * input1_strides_data[i];                                           \
+                        input2_id += output_xyz_id * input2_strides_data[i];                                           \
+                    }                                                                                                  \
+                                                                                                                       \
+                    const _DataType_input1 input1_elem = input1_data[input1_id];                                       \
+                    const _DataType_input2 input2_elem = input2_data[input2_id];                                       \
+                    result[output_id] = __operation__;                                                                 \
+                }                                                                                                      \
+            };                                                                                                         \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.depends_on(copy_strides_ev);                                                                       \
+                cgh.parallel_for<                                                                                      \
+                    class __name__##_strides_kernel<_DataType_input1, _DataType_input2>>(                              \
+                    gws, kernel_parallel_for_func);                                                                    \
+            };                                                                                                         \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            sycl::free(dev_strides_data, q);                                                                           \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+        else                                                                                                           \
+        {                                                                                                              \
+            constexpr size_t lws = 64;                                                                                 \
+            constexpr unsigned int vec_sz = 8;                                                                         \
+            constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space;            \
+                                                                                                                       \
+            auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);                \
+            auto lws_range = sycl::range<1>(lws);                                                                      \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {                                              \
+                auto sg = nd_it.get_sub_group();                                                                       \
+                const auto max_sg_size = sg.get_max_local_range()[0];                                                  \
+                const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +                         \
+                                               sg.get_group_id()[0] * max_sg_size);                                    \
+                                                                                                                       \
+                if (start + static_cast<size_t>(vec_sz) * max_sg_size < result_size) {                                 \
+                    sycl::vec<_DataType_input1, vec_sz> x1 =                                                           \
+                        sg.load<vec_sz>(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start]));         \
+                    sycl::vec<_DataType_input2, vec_sz> x2 =                                                           \
+                        sg.load<vec_sz>(sycl::multi_ptr<_DataType_input2, global_space>(&input2_data[start]));         \
+                    sycl::vec<bool, vec_sz> res_vec;                                                                   \
+                                                                                                                       \
+                    for (size_t k = 0; k < vec_sz; ++k) {                                                              \
+                        const _DataType_input1 input1_elem = x1[k];                                                    \
+                        const _DataType_input2 input2_elem = x2[k];                                                    \
+                        res_vec[k] = __operation__;                                                                    \
+                    }                                                                                                  \
+                    sg.store<vec_sz>(sycl::multi_ptr<bool, global_space>(&result[start]), res_vec);                    \
+                                                                                                                       \
+                }                                                                                                      \
+                else {                                                                                                 \
+                    for (size_t k = start; k < result_size; ++k) {                                                     \
+                        const _DataType_input1 input1_elem = input1_data[k];                                           \
+                        const _DataType_input2 input2_elem = input2_data[k];                                           \
+                        result[k] = __operation__;                                                                     \
+                    }                                                                                                  \
+                }                                                                                                      \
+            };                                                                                                         \
+                                                                                                                       \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.parallel_for<class __name__##_kernel<_DataType_input1, _DataType_input2>>(                         \
+                    sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func);                                \
+            };                                                                                                         \
+            event = q.submit(kernel_func);                                                                             \
+        }                                                                                                              \
+                                                                                                                       \
+        event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
+        return DPCTLEvent_Copy(event_ref);                                                                             \
+    }                                                                                                                  \
+                                                                                                                       \
+    template <typename _DataType_input1, typename _DataType_input2>                                                    \
+    DPCTLSyclEventRef (*__name__##_ext)(DPCTLSyclQueueRef,                                                             \
+                                        void*,                                                                         \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const void*,                                                                   \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const void*,                                                                   \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const size_t*,                                                                 \
+                                        const DPCTLEventVectorRef) = __name__<_DataType_input1,                        \
+                                                                              _DataType_input2>;
+
+#include <dpnp_gen_2arg_2type_tbl.hpp>
+
+template <DPNPFuncType FT1, DPNPFuncType ... FTs>
+static void func_map_logic_2arg_2type_core(func_map_t& fmap)
+{
+    ((fmap[DPNPFuncName::DPNP_FN_EQUAL_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_GREATER_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_greater_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_GREATER_EQUAL_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_greater_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LESS_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_less_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LESS_EQUAL_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_less_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_AND_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_logical_and_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_OR_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_logical_or_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_XOR_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_logical_xor_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_NOT_EQUAL_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_not_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+}
+
+template <DPNPFuncType ... FTs>
+static void func_map_logic_2arg_2type_helper(func_map_t& fmap)
+{
+    ((func_map_logic_2arg_2type_core<FTs, FTs...>(fmap)), ...);
+}
+
 void func_map_init_logic(func_map_t& fmap)
 {
     fmap[DPNPFuncName::DPNP_FN_ALL][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_all_default_c<bool, bool>};
@@ -378,5 +830,8 @@ void func_map_init_logic(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_any_ext_c<float, bool>};
     fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_any_ext_c<double, bool>};
 
+    func_map_logic_1arg_1type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);
+    func_map_logic_2arg_2type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);
+
     return;
 }
diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp
index 4411e207003d..568db448d966 100644
--- a/dpnp/backend/kernels/dpnp_krnl_random.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -37,6 +37,9 @@
 #include "queue_sycl.hpp"
 #include "dpnp_random_state.hpp"
 
+static_assert(INTEL_MKL_VERSION >= __INTEL_MKL_2023_VERSION_REQUIRED,
+              "MKL does not meet minimum version requirement");
+
 namespace mkl_blas = oneapi::mkl::blas;
 namespace mkl_rng = oneapi::mkl::rng;
 namespace mkl_vm = oneapi::mkl::vm;
@@ -990,11 +993,7 @@ DPCTLSyclEventRef dpnp_rng_multinomial_c(DPCTLSyclQueueRef q_ref,
             DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result, size, true, true);
             _DataType* result1 = result_ptr.get_ptr();
 
-#if (INTEL_MKL_VERSION < __INTEL_MKL_2023_SWITCHOVER)
-            std::vector<double> p(p_data, p_data + p_size);
-#else
             auto p = sycl::span<double>{p_data, p_size};
-#endif
             mkl_rng::multinomial<_DataType> distribution(ntrial, p);
 
             // perform generation
@@ -1082,13 +1081,8 @@ DPCTLSyclEventRef dpnp_rng_multivariate_normal_c(DPCTLSyclQueueRef q_ref,
 
     _DataType* result1 = static_cast<_DataType *>(result);
 
-#if (INTEL_MKL_VERSION < __INTEL_MKL_2023_SWITCHOVER)
-    std::vector<double> mean(mean_data, mean_data + mean_size);
-    std::vector<double> cov(cov_data, cov_data + cov_size);
-#else
     auto mean = sycl::span<double>{mean_data, mean_size};
     auto cov = sycl::span<double>{cov_data, cov_size};
-#endif
 
     // `result` is a array for random numbers
     // `size` is a `result`'s len.
diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp
index 5b10bc71a8be..742e6dff3783 100644
--- a/dpnp/backend/src/dpnp_fptr.hpp
+++ b/dpnp/backend/src/dpnp_fptr.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -33,6 +33,9 @@
 #define BACKEND_FPTR_H
 
 #include <map>
+#include <complex>
+
+#include <CL/sycl.hpp>
 
 #include <dpnp_iface_fptr.hpp>
 
@@ -64,6 +67,120 @@ const DPNPFuncType eft_C64 = DPNPFuncType::DPNP_FT_CMPLX64;
 const DPNPFuncType eft_C128 = DPNPFuncType::DPNP_FT_CMPLX128;
 const DPNPFuncType eft_BLN = DPNPFuncType::DPNP_FT_BOOL;
 
+/**
+ * An internal structure to build a pair of Data type enum value with C++ type
+ */
+template <DPNPFuncType FuncType, typename T>
+struct func_type_pair_t
+{
+   using type = T;
+
+   static func_type_pair_t get_pair(std::integral_constant<DPNPFuncType, FuncType>) { return {}; }
+};
+
+/**
+ * An internal structure to create a map of Data type enum value associated with C++ type
+ */
+template <typename ... Ps>
+struct func_type_map_factory_t : public Ps...
+{
+   using Ps::get_pair...;
+
+   template <DPNPFuncType FuncType>
+   using find_type = typename decltype(get_pair(std::integral_constant<DPNPFuncType, FuncType>{}))::type;
+};
+
+/**
+ * A map of the FPTR interface to link Data type enum value with accociated C++ type
+ */
+typedef func_type_map_factory_t<func_type_pair_t<eft_BLN, bool>,
+                                func_type_pair_t<eft_INT, std::int32_t>,
+                                func_type_pair_t<eft_LNG, std::int64_t>,
+                                func_type_pair_t<eft_FLT, float>,
+                                func_type_pair_t<eft_DBL, double>,
+                                func_type_pair_t<eft_C64, std::complex<float>>,
+                                func_type_pair_t<eft_C128, std::complex<double>>> func_type_map_t;
+
+/**
+ * Return an enum value of result type populated from input types.
+ */
+template <DPNPFuncType FT1, DPNPFuncType FT2>
+static constexpr DPNPFuncType populate_func_types()
+{
+    if constexpr (FT1 == DPNPFuncType::DPNP_FT_NONE)
+    {
+        throw std::runtime_error("Templated enum value of FT1 is None");
+    }
+    else if constexpr (FT2 == DPNPFuncType::DPNP_FT_NONE)
+    {
+        throw std::runtime_error("Templated enum value of FT2 is None");
+    }
+    return (FT1 < FT2) ? FT2 : FT1;
+}
+
+/**
+ * @brief A helper function to cast SYCL vector between types.
+ */
+template <typename Op, typename Vec, std::size_t... I>
+static auto dpnp_vec_cast_impl(const Vec& v, std::index_sequence<I...>)
+{
+    return Op{v[I]...};
+}
+
+/**
+ * @brief A casting function for SYCL vector.
+ * 
+ * @tparam dstT A result type upon casting.
+ * @tparam srcT An incoming type of the vector.
+ * @tparam N A number of elements with the vector.
+ * @tparam Indices A sequence of integers
+ * @param s An incoming SYCL vector to cast.
+ * @return SYCL vector casted to desctination type.
+ */
+template <typename dstT, typename srcT, std::size_t N, typename Indices = std::make_index_sequence<N>>
+static auto dpnp_vec_cast(const sycl::vec<srcT, N>& s)
+{
+    return dpnp_vec_cast_impl<sycl::vec<dstT, N>, sycl::vec<srcT, N>>(s, Indices{});
+}
+
+/**
+ * Removes parentheses for a passed list of types separated by comma.
+ * It's intended to be used in operations macro.
+ */
+#define MACRO_UNPACK_TYPES(...) __VA_ARGS__
+
+/**
+ * Implements std::is_same<> with variadic number of types to compare with
+ * and when type T has to match only one of types Ts.
+ */
+template <typename T, typename... Ts>
+struct is_any : std::disjunction<std::is_same<T, Ts>...> {};
+
+/**
+ * Implements std::is_same<> with variadic number of types to compare with
+ * and when type T has to match every type from Ts sequence.
+ */
+template <typename T, typename... Ts>
+struct are_same : std::conjunction<std::is_same<T, Ts>...> {};
+
+/**
+ * A template constat to check if both types T1 and T2 match every type from Ts sequence.
+ */
+template <typename T1, typename T2, typename... Ts>
+constexpr auto both_types_are_same = std::conjunction_v<is_any<T1, Ts...>, are_same<T1, T2>>;
+
+/**
+ * A template constat to check if both types T1 and T2 match any type from Ts.
+ */
+template <typename T1, typename T2, typename... Ts>
+constexpr auto both_types_are_any_of = std::conjunction_v<is_any<T1, Ts...>, is_any<T2, Ts...>>;
+
+/**
+ * A template constat to check if both types T1 and T2 don't match any type from Ts sequence.
+ */
+template <typename T1, typename T2, typename... Ts>
+constexpr auto none_of_both_types = !std::disjunction_v<is_any<T1, Ts...>, is_any<T2, Ts...>>;
+
 /**
  * FPTR interface initialization functions
  */
diff --git a/dpnp/backend/src/dpnp_utils.hpp b/dpnp/backend/src/dpnp_utils.hpp
index 33f4d750067f..985d5a61494e 100644
--- a/dpnp/backend/src/dpnp_utils.hpp
+++ b/dpnp/backend/src/dpnp_utils.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -45,15 +45,15 @@
  * Intel(R) oneAPI DPC++ 2022.2.1 compiler has version 20221020L on Linux and
  * 20221101L on Windows.
  */
-#ifndef __SYCL_COMPILER_2023_SWITCHOVER
-#define __SYCL_COMPILER_2023_SWITCHOVER 20221102L
+#ifndef __SYCL_COMPILER_VERSION_REQUIRED
+#define __SYCL_COMPILER_VERSION_REQUIRED 20221102L
 #endif
 
 /**
  * Version of Intel MKL at which transition to OneMKL release 2023.0.0 occurs.
  */
-#ifndef __INTEL_MKL_2023_SWITCHOVER
-#define __INTEL_MKL_2023_SWITCHOVER 20230000
+#ifndef __INTEL_MKL_2023_VERSION_REQUIRED
+#define __INTEL_MKL_2023_VERSION_REQUIRED 20230000
 #endif
 
 /**
diff --git a/dpnp/backend/src/dpnpc_memory_adapter.hpp b/dpnp/backend/src/dpnpc_memory_adapter.hpp
index dab09622a698..6c81f5267787 100644
--- a/dpnp/backend/src/dpnpc_memory_adapter.hpp
+++ b/dpnp/backend/src/dpnpc_memory_adapter.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -85,10 +85,6 @@ class DPNPC_ptr_adapter final
             std::cerr << "\n\t size_in_bytes=" << size_in_bytes;
             std::cerr << "\n\t pointer type=" << (long)src_ptr_type;
             std::cerr << "\n\t queue inorder=" << queue.is_in_order();
-#if (__SYCL_COMPILER_VERSION < __SYCL_COMPILER_2023_SWITCHOVER)
-            std::cerr << "\n\t queue is_host=" << queue.is_host();
-            std::cerr << "\n\t queue device is_host=" << queue.get_device().is_host();
-#endif
             std::cerr << "\n\t queue device is_cpu=" << queue.get_device().is_cpu();
             std::cerr << "\n\t queue device is_gpu=" << queue.get_device().is_gpu();
             std::cerr << "\n\t queue device is_accelerator=" << queue.get_device().is_accelerator();
diff --git a/dpnp/backend/src/queue_sycl.cpp b/dpnp/backend/src/queue_sycl.cpp
index 0810ed0aaba8..55f78230d64e 100644
--- a/dpnp/backend/src/queue_sycl.cpp
+++ b/dpnp/backend/src/queue_sycl.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -215,11 +215,6 @@ bool backend_sycl::backend_sycl_is_cpu()
     if (qptr.get_device().is_cpu()) {
         return true;
     }
-#if (__SYCL_COMPILER_VERSION < __SYCL_COMPILER_2023_SWITCHOVER)
-    else if (qptr.is_host() || qptr.get_device().is_host()) {
-        return true;
-    }
-#endif
 
     return false;
 }
diff --git a/dpnp/backend/src/queue_sycl.hpp b/dpnp/backend/src/queue_sycl.hpp
index af03e1b6f121..8683fdd5737d 100644
--- a/dpnp/backend/src/queue_sycl.hpp
+++ b/dpnp/backend/src/queue_sycl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -113,7 +113,7 @@ class backend_sycl
     static void backend_sycl_queue_init(QueueOptions selector = QueueOptions::CPU_SELECTOR);
 
     /**
-     * Return True if current @ref queue is related to cpu or host device
+     * Return True if current @ref queue is related to cpu device
      */
     static bool backend_sycl_is_cpu();
 
@@ -137,6 +137,13 @@ class backend_sycl
 #else
         // temporal solution. Started from Sept-2020
         DPCTLSyclQueueRef DPCtrl_queue = DPCTLQueueMgr_GetCurrentQueue();
+        if (DPCtrl_queue == nullptr)
+        {
+            std::string reason = (DPCTLQueueMgr_GetQueueStackSize() == static_cast<size_t>(-1))
+                                     ? ": the queue stack is empty, probably no device is available."
+                                     : ".";
+            throw std::runtime_error("Failed to create a copy of SYCL queue with default device" + reason);
+        }
         return *(reinterpret_cast<sycl::queue*>(DPCtrl_queue));
 #endif
     }
diff --git a/dpnp/dparray.pyx b/dpnp/dparray.pyx
index 859bf49d59a8..dffbf6f65d15 100644
--- a/dpnp/dparray.pyx
+++ b/dpnp/dparray.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -462,7 +462,7 @@ cdef class dparray:
             return ( < long * > self._dparray_data)[lin_idx]
         elif self.dtype == numpy.int32:
             return ( < int * > self._dparray_data)[lin_idx]
-        elif self.dtype == numpy.bool:
+        elif self.dtype == numpy.bool_:
             return ( < cpp_bool * > self._dparray_data)[lin_idx]
         elif self.dtype == numpy.complex128:
             return ( < double complex * > self._dparray_data)[lin_idx]
@@ -489,7 +489,7 @@ cdef class dparray:
             ( < long * > self._dparray_data)[lin_idx] = <long > value
         elif self.dtype == numpy.int32:
             ( < int * > self._dparray_data)[lin_idx] = <int > value
-        elif self.dtype == numpy.bool:
+        elif self.dtype == numpy.bool_:
             ( < cpp_bool * > self._dparray_data)[lin_idx] = < cpp_bool > value
         elif self.dtype == numpy.complex64:
             ( < float complex * > self._dparray_data)[lin_idx] = <float complex > value
@@ -876,7 +876,7 @@ cdef class dparray:
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex):
+        if not numpy.issubsctype(self.dtype, numpy.complex_):
             return self
         else:
             return conjugate(self)
@@ -889,7 +889,7 @@ cdef class dparray:
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex):
+        if not numpy.issubsctype(self.dtype, numpy.complex_):
             return self
         else:
             return conjugate(self)
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index e0c82b6125ce..9bf161b0aaf7 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -127,6 +127,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_EIG_EXT
         DPNP_FN_EIGVALS
         DPNP_FN_EIGVALS_EXT
+        DPNP_FN_EQUAL_EXT
         DPNP_FN_ERF
         DPNP_FN_ERF_EXT
         DPNP_FN_EYE
@@ -155,6 +156,8 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_FMOD_EXT
         DPNP_FN_FULL
         DPNP_FN_FULL_LIKE
+        DPNP_FN_GREATER_EXT
+        DPNP_FN_GREATER_EQUAL_EXT
         DPNP_FN_HYPOT
         DPNP_FN_HYPOT_EXT
         DPNP_FN_IDENTITY
@@ -169,6 +172,8 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_KRON_EXT
         DPNP_FN_LEFT_SHIFT
         DPNP_FN_LEFT_SHIFT_EXT
+        DPNP_FN_LESS_EXT
+        DPNP_FN_LESS_EQUAL_EXT
         DPNP_FN_LOG
         DPNP_FN_LOG_EXT
         DPNP_FN_LOG10
@@ -177,6 +182,10 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_LOG1P_EXT
         DPNP_FN_LOG2
         DPNP_FN_LOG2_EXT
+        DPNP_FN_LOGICAL_AND_EXT
+        DPNP_FN_LOGICAL_NOT_EXT
+        DPNP_FN_LOGICAL_OR_EXT
+        DPNP_FN_LOGICAL_XOR_EXT
         DPNP_FN_MATMUL
         DPNP_FN_MATMUL_EXT
         DPNP_FN_MATRIX_RANK
@@ -203,6 +212,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_NEGATIVE_EXT
         DPNP_FN_NONZERO
         DPNP_FN_NONZERO_EXT
+        DPNP_FN_NOT_EQUAL_EXT
         DPNP_FN_ONES
         DPNP_FN_ONES_LIKE
         DPNP_FN_PARTITION
@@ -364,6 +374,8 @@ cdef extern from "dpnp_iface_fptr.hpp":
     struct DPNPFuncData:
         DPNPFuncType return_type
         void * ptr
+        DPNPFuncType return_type_no_fp64
+        void *ptr_no_fp64
 
     DPNPFuncData get_dpnp_function_ptr(DPNPFuncName name, DPNPFuncType first_type, DPNPFuncType second_type) except +
 
@@ -379,7 +391,7 @@ cdef extern from "constants.hpp":
 
 cdef extern from "dpnp_iface.hpp":
     void dpnp_queue_initialize_c(QueueOptions selector)
-    size_t dpnp_queue_is_cpu_c()
+    size_t dpnp_queue_is_cpu_c() except +
 
     char * dpnp_memory_alloc_c(size_t size_in_bytes) except +
     void dpnp_memory_free_c(void * ptr)
@@ -429,7 +441,7 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_strides_t)(c_dpctl.DPCTLSyclQu
                                                              const shape_elem_type * ,
                                                              const shape_elem_type * ,
                                                              const long * ,
-                                                             const c_dpctl.DPCTLEventVectorRef)
+                                                             const c_dpctl.DPCTLEventVectorRef) except +
 ctypedef void(*fptr_blas_gemm_2in_1out_t)(void *, void * , void * , size_t, size_t, size_t)
 ctypedef c_dpctl.DPCTLSyclEventRef(*dpnp_reduction_c_t)(c_dpctl.DPCTLSyclQueueRef,
                                                         void *,
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 41f0c0c01026..f12707ccc761 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -221,7 +221,7 @@ cpdef dpnp_queue_initialize():
 
 
 cpdef dpnp_queue_is_cpu():
-    """Return 1 if current queue is CPU or HOST. Return 0 otherwise.
+    """Return 1 if current queue is CPU. Return 0 otherwise.
 
     """
     return dpnp_queue_is_cpu_c()
@@ -276,7 +276,7 @@ cdef dpnp_DPNPFuncType_to_dtype(size_t type):
     elif type == <size_t > DPNP_FT_CMPLX128:
         return numpy.complex128
     elif type == <size_t > DPNP_FT_BOOL:
-        return numpy.bool
+        return numpy.bool_
     else:
         utils.checker_throw_type_error("dpnp_DPNPFuncType_to_dtype", type)
 
@@ -481,8 +481,6 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(fptr_name, x1_c_type, x2_c_type)
 
-    result_type = dpnp_DPNPFuncType_to_dtype( < size_t > kernel_data.return_type)
-
     # Create result array
     cdef shape_type_c x1_shape = x1_obj.shape
 
@@ -495,15 +493,26 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
 
     result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj)
 
+    # get FPTR function and return type
+    cdef fptr_2in_1out_strides_t func = NULL
+    cdef DPNPFuncType return_type = DPNP_FT_NONE
+    if fptr_name != DPNP_FN_DIVIDE_EXT or result_sycl_device.has_aspect_fp64:
+        return_type = kernel_data.return_type
+        func = < fptr_2in_1out_strides_t > kernel_data.ptr
+    else:
+        return_type = kernel_data.return_type_no_fp64
+        func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64
+
     if out is None:
         """ Create result array with type given by FPTR data """
         result = utils.create_output_descriptor(result_shape,
-                                                kernel_data.return_type,
+                                                return_type,
                                                 None,
                                                 device=result_sycl_device,
                                                 usm_type=result_usm_type,
                                                 sycl_queue=result_sycl_queue)
     else:
+        result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type)
         if out.dtype != result_type:
             utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type)
         if out.shape != result_shape:
@@ -517,11 +526,10 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
 
     result_obj = result.get_array()
 
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_obj.sycl_queue
+    cdef c_dpctl.SyclQueue q = < c_dpctl.SyclQueue > result_obj.sycl_queue
     cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     """ Call FPTR function """
-    cdef fptr_2in_1out_strides_t func = <fptr_2in_1out_strides_t > kernel_data.ptr
     cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
                                                     result.get_data(),
                                                     result.size,
diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
index c1c24a27747b..cb44a08db598 100644
--- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -37,7 +37,6 @@ and the rest of the library
 __all__ += [
     "dpnp_copy",
     "dpnp_diag",
-    "dpnp_eye",
     "dpnp_geomspace",
     "dpnp_identity",
     "dpnp_linspace",
@@ -46,8 +45,6 @@ __all__ += [
     "dpnp_ptp",
     "dpnp_trace",
     "dpnp_tri",
-    "dpnp_tril",
-    "dpnp_triu",
     "dpnp_vander",
 ]
 
@@ -84,9 +81,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_1out_func_ptr_t)(c_dpctl.DPC
                                                                      const size_t ,
                                                                      const int,
                                                                      const c_dpctl.DPCTLEventVectorRef) except +
-ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_eye_t)(c_dpctl.DPCTLSyclQueueRef,
-                                                     void *, int , const shape_elem_type * ,
-                                                     const c_dpctl.DPCTLEventVectorRef)
 ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_trace_t)(c_dpctl.DPCTLSyclQueueRef,
                                                        const void *,
                                                        void * ,
@@ -146,36 +140,6 @@ cpdef utils.dpnp_descriptor dpnp_diag(utils.dpnp_descriptor v, int k):
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_eye(N, M=None, k=0, dtype=None):
-    if dtype is None:
-        dtype = dpnp.float64
-
-    if M is None:
-        M = N
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype)
-
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EYE_EXT, param1_type, param1_type)
-
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor((N, M), kernel_data.return_type, None)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef fptr_dpnp_eye_t func = <fptr_dpnp_eye_t > kernel_data.ptr
-
-    cdef shape_type_c result_shape = result.shape
-
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), k, result_shape.data(), NULL)
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
 cpdef utils.dpnp_descriptor dpnp_geomspace(start, stop, num, endpoint, dtype, axis):
     cdef shape_type_c obj_shape = utils._object_to_tuple(num)
     cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(obj_shape, dtype, None)
@@ -434,7 +398,7 @@ cpdef utils.dpnp_descriptor dpnp_trace(utils.dpnp_descriptor arr, offset=0, axis
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=numpy.float):
+cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=dpnp.float):
     if M is None:
         M = N
 
@@ -460,94 +424,6 @@ cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=numpy.float):
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_tril(utils.dpnp_descriptor m, int k):
-    cdef shape_type_c input_shape = m.shape
-    cdef shape_type_c result_shape
-
-    if m.ndim == 1:
-        result_shape = (m.shape[0], m.shape[0])
-    else:
-        result_shape = m.shape
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(m.dtype)
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIL_EXT, param1_type, param1_type)
-
-    m_obj = m.get_array()
-
-    # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=m_obj.sycl_device,
-                                                                       usm_type=m_obj.usm_type,
-                                                                       sycl_queue=m_obj.sycl_queue)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef custom_1in_1out_func_ptr_t func = <custom_1in_1out_func_ptr_t > kernel_data.ptr
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    m.get_data(),
-                                                    result.get_data(),
-                                                    k,
-                                                    input_shape.data(),
-                                                    result_shape.data(),
-                                                    m.ndim,
-                                                    result.ndim,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_triu(utils.dpnp_descriptor m, int k):
-    cdef shape_type_c input_shape = m.shape
-    cdef shape_type_c result_shape
-
-    if m.ndim == 1:
-        result_shape = (m.shape[0], m.shape[0])
-    else:
-        result_shape = m.shape
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(m.dtype)
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIU_EXT, param1_type, param1_type)
-
-    m_obj = m.get_array()
-
-    # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=m_obj.sycl_device,
-                                                                       usm_type=m_obj.usm_type,
-                                                                       sycl_queue=m_obj.sycl_queue)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef custom_1in_1out_func_ptr_t func = <custom_1in_1out_func_ptr_t > kernel_data.ptr
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    m.get_data(),
-                                                    result.get_data(),
-                                                    k,
-                                                    input_shape.data(),
-                                                    result_shape.data(),
-                                                    m.ndim,
-                                                    result.ndim,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
 cpdef utils.dpnp_descriptor dpnp_vander(utils.dpnp_descriptor x1, int N, int increasing):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_VANDER_EXT, param1_type, DPNP_FT_NONE)
diff --git a/dpnp/dpnp_algo/dpnp_algo_logic.pyx b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
index e0b928ddf025..b6ac36db412b 100644
--- a/dpnp/dpnp_algo/dpnp_algo_logic.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -166,46 +166,28 @@ cpdef utils.dpnp_descriptor dpnp_any(utils.dpnp_descriptor array1):
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] == input2.get_pyobj()[i])
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_greater(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] > input2.get_pyobj()[i])
+cpdef utils.dpnp_descriptor dpnp_equal(utils.dpnp_descriptor x1_obj,
+                                       utils.dpnp_descriptor x2_obj,
+                                       object dtype=None,
+                                       utils.dpnp_descriptor out=None,
+                                       object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="equal")
 
-    return result
 
+cpdef utils.dpnp_descriptor dpnp_greater(utils.dpnp_descriptor x1_obj,
+                                         utils.dpnp_descriptor x2_obj,
+                                         object dtype=None,
+                                         utils.dpnp_descriptor out=None,
+                                         object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_GREATER_EXT, x1_obj, x2_obj, dtype, out, where, func_name="greater")
 
-cpdef utils.dpnp_descriptor dpnp_greater_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] >= input2.get_pyobj()[i])
 
-    return result
+cpdef utils.dpnp_descriptor dpnp_greater_equal(utils.dpnp_descriptor x1_obj,
+                                               utils.dpnp_descriptor x2_obj,
+                                               object dtype=None,
+                                               utils.dpnp_descriptor out=None,
+                                               object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_GREATER_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="greater_equal")
 
 
 cpdef utils.dpnp_descriptor dpnp_isclose(utils.dpnp_descriptor input1,
@@ -272,103 +254,56 @@ cpdef utils.dpnp_descriptor dpnp_isnan(utils.dpnp_descriptor input1):
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_less(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] < input2.get_pyobj()[i])
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] <= input2.get_pyobj()[i])
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_and(input1.get_pyobj()[i], input2.get_pyobj()[i])
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor input1):
-    input1_obj = input1.get_array()
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=input1_obj.sycl_device,
-                                                                             usm_type=input1_obj.usm_type,
-                                                                             sycl_queue=input1_obj.sycl_queue)
-
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_not(input1.get_pyobj()[i])
+cpdef utils.dpnp_descriptor dpnp_less(utils.dpnp_descriptor x1_obj,
+                                      utils.dpnp_descriptor x2_obj,
+                                      object dtype=None,
+                                      utils.dpnp_descriptor out=None,
+                                      object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LESS_EXT, x1_obj, x2_obj, dtype, out, where, func_name="less")
 
-    return result
 
+cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor x1_obj,
+                                            utils.dpnp_descriptor x2_obj,
+                                            object dtype=None,
+                                            utils.dpnp_descriptor out=None,
+                                            object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LESS_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="less_equal")
 
-cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
 
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_or(input1.get_pyobj()[i], input2.get_pyobj()[i])
+cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor x1_obj,
+                                             utils.dpnp_descriptor x2_obj,
+                                             object dtype=None,
+                                             utils.dpnp_descriptor out=None,
+                                             object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_AND_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_and")
 
-    return result
 
+cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor x_obj,
+                                            object dtype=None,
+                                            utils.dpnp_descriptor out=None,
+                                            object where=True):
+    return call_fptr_1in_1out_strides(DPNP_FN_LOGICAL_NOT_EXT, x_obj, dtype, out, where, func_name="logical_not")
 
-cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
 
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_xor(input1.get_pyobj()[i], input2.get_pyobj()[i])
+cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor x1_obj,
+                                            utils.dpnp_descriptor x2_obj,
+                                            object dtype=None,
+                                            utils.dpnp_descriptor out=None,
+                                            object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_OR_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_or")
 
-    return result
 
+cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor x1_obj,
+                                             utils.dpnp_descriptor x2_obj,
+                                             object dtype=None,
+                                             utils.dpnp_descriptor out=None,
+                                             object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_XOR_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_xor")
 
-cpdef utils.dpnp_descriptor dpnp_not_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] != input2.get_pyobj()[i])
 
-    return result
+cpdef utils.dpnp_descriptor dpnp_not_equal(utils.dpnp_descriptor x1_obj,
+                                           utils.dpnp_descriptor x2_obj,
+                                           object dtype=None,
+                                           utils.dpnp_descriptor out=None,
+                                           object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_NOT_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="not_equal")
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 82c271fa7d90..c50ed9792720 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -140,7 +140,10 @@ def __bool__(self):
         return self._array_obj.__bool__()
 
  # '__class__',
- # '__complex__',
+
+    def __complex__(self):
+        return self._array_obj.__complex__()
+
  # '__contains__',
  # '__copy__',
  # '__deepcopy__',
@@ -150,6 +153,12 @@ def __bool__(self):
  # '__divmod__',
  # '__doc__',
 
+    def __dlpack__(self, stream=None):
+        return self._array_obj.__dlpack__(stream=stream)
+
+    def __dlpack_device__(self):
+        return self._array_obj.__dlpack_device__()
+
     def __eq__(self, other):
         return dpnp.equal(self, other)
 
@@ -187,7 +196,10 @@ def __gt__(self, other):
  # '__imatmul__',
  # '__imod__',
  # '__imul__',
- # '__index__',
+
+    def __index__(self):
+        return self._array_obj.__index__()
+
  # '__init__',
  # '__init_subclass__',
 
@@ -247,7 +259,10 @@ def __radd__(self, other):
  # '__rdivmod__',
  # '__reduce__',
  # '__reduce_ex__',
- # '__repr__',
+
+    def __repr__(self):
+        return dpt.usm_ndarray_repr(self._array_obj, prefix="array")
+
  # '__rfloordiv__',
  # '__rlshift__',
 
@@ -264,7 +279,9 @@ def __rmul__(self, other):
  # '__rpow__',
  # '__rrshift__',
  # '__rshift__',
- # '__rsub__',
+
+    def __rsub__(self, other):
+        return dpnp.subtract(other, self)
 
     def __rtruediv__(self, other):
         return dpnp.true_divide(other, self)
@@ -292,8 +309,7 @@ def __str__(self):
 
         """
 
-        return str(self.asnumpy())
-
+        return self._array_obj.__str__()
 
     def __sub__(self, other):
         return dpnp.subtract(self, other)
@@ -305,6 +321,16 @@ def __truediv__(self, other):
 
  # '__xor__',
 
+    @staticmethod
+    def _create_from_usm_ndarray(usm_ary : dpt.usm_ndarray):
+        if not isinstance(usm_ary, dpt.usm_ndarray):
+            raise TypeError(
+                f"Expected dpctl.tensor.usm_ndarray, got {type(usm_ary)}"
+                )
+        res = dpnp_array.__new__(dpnp_array)
+        res._array_obj = usm_ary
+        return res
+
     def all(self, axis=None, out=None, keepdims=False):
         """
         Returns True if all elements evaluate to True.
@@ -493,7 +519,7 @@ def conj(self):
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex):
+        if not numpy.issubsctype(self.dtype, numpy.complex_):
             return self
         else:
             return dpnp.conjugate(self)
@@ -506,7 +532,7 @@ def conjugate(self):
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex):
+        if not numpy.issubsctype(self.dtype, numpy.complex_):
             return self
         else:
             return dpnp.conjugate(self)
diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index 93ab716eb59a..75e20f8a0cb6 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -45,8 +45,11 @@
     "arange",
     "asarray",
     "empty",
+    "eye",
     "full",
     "ones"
+    "tril",
+    "triu",
     "zeros",
 ]
 
@@ -150,6 +153,33 @@ def full(shape,
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
+def eye(N,
+        M=None,
+        /,
+        *,
+        k=0,
+        dtype=None,
+        order="C",
+        device=None,
+        usm_type="device",
+        sycl_queue=None):
+    """Validate input parameters before passing them into `dpctl.tensor` module"""
+    dpu.validate_usm_type(usm_type, allow_none=False)
+    sycl_queue_normalized = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue, device=device)
+    if order is None:
+        order = 'C'
+
+    """Creates `dpnp_array` with ones on the `k`th diagonal."""
+    array_obj = dpt.eye(N,
+                        M,
+                        k=k,
+                        dtype=dtype,
+                        order=order,
+                        usm_type=usm_type,
+                        sycl_queue=sycl_queue_normalized)
+    return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
+
+
 def ones(shape,
          *,
          dtype=None,
@@ -172,6 +202,18 @@ def ones(shape,
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
+def tril(x1, /, *, k=0):
+    """"Creates `dpnp_array` as lower triangular part of an input array."""
+    array_obj = dpt.tril(x1.get_array() if isinstance(x1, dpnp_array) else x1, k)
+    return dpnp_array(array_obj.shape, buffer=array_obj, order="K")
+
+
+def triu(x1, /, *, k=0):
+    """"Creates `dpnp_array` as upper triangular part of an input array."""
+    array_obj = dpt.triu(x1.get_array() if isinstance(x1, dpnp_array) else x1, k)
+    return dpnp_array(array_obj.shape, buffer=array_obj, order="K")
+
+
 def zeros(shape,
           *,
           dtype=None,
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 4e791ad0eaf9..b7cdef8cc615 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -61,15 +61,18 @@
     "asnumpy",
     "astype",
     "convert_single_elem_array_to_scalar",
+    "default_float_type",
     "dpnp_queue_initialize",
     "dpnp_queue_is_cpu",
+    "from_dlpack",
     "get_dpnp_descriptor",
     "get_include",
     "get_normalized_queue_device"
 ]
 
 from dpnp import (
-    isscalar
+    isscalar,
+    float64
 )
 
 from dpnp.dpnp_iface_arraycreation import *
@@ -191,9 +194,64 @@ def convert_single_elem_array_to_scalar(obj, keepdims=False):
     return obj
 
 
+def default_float_type(device=None, sycl_queue=None):
+    """
+    Return a floating type used by default in DPNP depending on device capabilities.
+
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where an array of default floating type might be created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+        The value ``None`` is interpreted as to use a default device.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue which might be used to create an array of default floating type.
+        The `sycl_queue` can be ``None`` (the default), which is interpreted as
+        to get the SYCL queue from `device` keyword if present or to use a default queue.
+
+    Returns
+    -------
+    dt : dtype
+        A default DPNP floating type.
+
+    """
+
+    _sycl_queue = get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
+    return map_dtype_to_device(float64, _sycl_queue.sycl_device)
+
+
+def from_dlpack(obj, /):
+    """
+    Create a dpnp array from a Python object implementing the ``__dlpack__``
+    protocol.
+
+    See https://dmlc.github.io/dlpack/latest/ for more details.
+
+    Parameters
+    ----------
+    obj : object
+        A Python object representing an array that implements the ``__dlpack__``
+        and ``__dlpack_device__`` methods.
+
+    Returns
+    -------
+    out : dpnp_array
+        Returns a new dpnp array containing the data from another array
+        (obj) with the ``__dlpack__`` method on the same device as object.
+
+    """
+
+    usm_ary = dpt.from_dlpack(obj)
+    return dpnp_array._create_from_usm_ndarray(usm_ary)
+
+
 def get_dpnp_descriptor(ext_obj,
                         copy_when_strides=True,
                         copy_when_nondefault_queue=True,
+                        alloc_usm_type=None,
                         alloc_queue=None):
     """
     Return True:
@@ -214,9 +272,9 @@ def get_dpnp_descriptor(ext_obj,
         return False
 
     # If input object is a scalar, it means it was allocated on host memory.
-    # We need to copy it to device memory according to compute follows data paradigm.
+    # We need to copy it to USM memory according to compute follows data paradigm.
     if isscalar(ext_obj):
-        ext_obj = array(ext_obj, sycl_queue=alloc_queue)
+        ext_obj = array(ext_obj, usm_type=alloc_usm_type, sycl_queue=alloc_queue)
 
     # while dpnp functions have no implementation with strides support
     # we need to create a non-strided copy
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 5fb4d8c7a4da..5b062a346b97 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -42,12 +42,14 @@
 
 import numpy
 import dpnp
+import operator
 
 import dpnp.config as config
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
 
 import dpnp.dpnp_container as dpnp_container
+import dpctl.tensor as dpt
 
 
 __all__ = [
@@ -530,7 +532,7 @@ def empty_like(x1,
 
     Limitations
     -----------
-    Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`.
+    Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
     Parameter ``order`` is supported with values ``"C"`` or ``"F"``.
     Parameter ``subok`` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -552,7 +554,7 @@ def empty_like(x1,
 
     """
 
-    if not isinstance(x1, dpnp.ndarray):
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
         pass
     elif order not in ('C', 'c', 'F', 'f', None):
         pass
@@ -572,31 +574,43 @@ def empty_like(x1,
     return call_origin(numpy.empty_like, x1, dtype, order, subok, shape)
 
 
-def eye(N, M=None, k=0, dtype=None, order='C', **kwargs):
+def eye(N,
+        M=None,
+        /,
+        *,
+        k=0,
+        dtype=None,
+        order="C",
+        like=None,
+        device=None,
+        usm_type="device",
+        sycl_queue=None):
     """
     Return a 2-D array with ones on the diagonal and zeros elsewhere.
     For full documentation refer to :obj:`numpy.eye`.
 
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
-    Parameters ``order`` is supported only with default value.
+    Parameter ``order`` is supported only with values ``"C"`` and ``"F"``.
+    Parameter ``like`` is supported only with default value ``None``.
+    Otherwise the function will be executed sequentially on CPU.
+
     """
-    if (not use_origin_backend()):
-        if not isinstance(N, (int, dpnp.int, dpnp.int32, dpnp.int64)):
-            pass
-        elif M is not None and not isinstance(M, (int, dpnp.int, dpnp.int32, dpnp.int64)):
-            pass
-        elif not isinstance(k, (int, dpnp.int, dpnp.int32, dpnp.int64)):
-            pass
-        elif order != 'C':
-            pass
-        elif len(kwargs) != 0:
-            pass
-        else:
-            return dpnp_eye(N, M=M, k=k, dtype=dtype).get_pyobj()
+    if order not in ('C', 'c', 'F', 'f', None):
+        pass
+    elif like is not None:
+        pass
+    else:
+        return dpnp_container.eye(N,
+                                  M,
+                                  k=k,
+                                  dtype=dtype,
+                                  order=order,
+                                  device=device,
+                                  usm_type=usm_type,
+                                  sycl_queue=sycl_queue)
 
-    return call_origin(numpy.eye, N, M=M, k=k, dtype=dtype, order=order, **kwargs)
+    return call_origin(numpy.eye, N, M, k=k, dtype=dtype, order=order, like=None)
 
 
 def frombuffer(buffer, **kwargs):
@@ -750,7 +764,7 @@ def full_like(x1,
 
     Limitations
     -----------
-    Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`.
+    Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
     Parameter ``order`` is supported only with values ``"C"`` and ``"F"``.
     Parameter ``subok`` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -771,7 +785,7 @@ def full_like(x1,
     [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
 
     """
-    if not isinstance(x1, dpnp.ndarray):
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
         pass
     elif order not in ('C', 'c', 'F', 'f', None):
         pass
@@ -859,10 +873,8 @@ def identity(n, dtype=None, *, like=None):
         elif n < 0:
             pass
         else:
-            if dtype is None:
-                sycl_queue = dpnp.get_normalized_queue_device(sycl_queue=None, device=None)
-                dtype = map_dtype_to_device(dpnp.float64, sycl_queue.sycl_device)
-            return dpnp_identity(n, dtype).get_pyobj()
+            _dtype = dpnp.default_float_type() if dtype is None else dtype
+            return dpnp_identity(n, _dtype).get_pyobj()
 
     return call_origin(numpy.identity, n, dtype=dtype, like=like)
 
@@ -1179,7 +1191,7 @@ def ones_like(x1,
 
     Limitations
     -----------
-    Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`.
+    Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
     Parameter ``order`` is supported with values ``"C"`` or ``"F"``.
     Parameter ``subok`` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -1201,7 +1213,7 @@ def ones_like(x1,
     [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
 
     """
-    if not isinstance(x1, dpnp.ndarray):
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
         pass
     elif order not in ('C', 'c', 'F', 'f', None):
         pass
@@ -1280,7 +1292,7 @@ def trace(x1, offset=0, axis1=0, axis2=1, dtype=None, out=None):
     return call_origin(numpy.trace, x1, offset, axis1, axis2, dtype, out)
 
 
-def tri(N, M=None, k=0, dtype=numpy.float, **kwargs):
+def tri(N, M=None, k=0, dtype=dpnp.float, **kwargs):
     """
     An array with ones at and below the given diagonal and zeros elsewhere.
 
@@ -1315,15 +1327,13 @@ def tri(N, M=None, k=0, dtype=numpy.float, **kwargs):
         elif not isinstance(k, int):
             pass
         else:
-            if dtype is numpy.float:
-                sycl_queue = dpnp.get_normalized_queue_device(sycl_queue=None, device=None)
-                dtype = map_dtype_to_device(dpnp.float64, sycl_queue.sycl_device)
-            return dpnp_tri(N, M, k, dtype).get_pyobj()
+            _dtype = dpnp.default_float_type() if dtype in (dpnp.float, None) else dtype
+            return dpnp_tri(N, M, k, _dtype).get_pyobj()
 
     return call_origin(numpy.tri, N, M, k, dtype, **kwargs)
 
 
-def tril(x1, k=0):
+def tril(x1, /, *, k=0):
     """
     Lower triangle of an array.
 
@@ -1331,6 +1341,12 @@ def tril(x1, k=0):
 
     For full documentation refer to :obj:`numpy.tril`.
 
+    Limitations
+    -----------
+    Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions.
+    Parameter `k` is supported only of integer data type.
+    Otherwise the function will be executed sequentially on CPU.
+
     Examples
     --------
     >>> import dpnp as np
@@ -1342,17 +1358,25 @@ def tril(x1, k=0):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if not isinstance(k, int):
-            pass
-        else:
-            return dpnp_tril(x1_desc, k).get_pyobj()
+    _k = None
+    try:
+        _k = operator.index(k)
+    except TypeError:
+        pass
+
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
+        pass
+    elif x1.ndim < 2:
+        pass
+    elif _k is None:
+        pass
+    else:
+        return dpnp_container.tril(x1, k=_k)
 
     return call_origin(numpy.tril, x1, k)
 
 
-def triu(x1, k=0):
+def triu(x1, /, *, k=0):
     """
     Upper triangle of an array.
 
@@ -1361,6 +1385,12 @@ def triu(x1, k=0):
 
     For full documentation refer to :obj:`numpy.triu`.
 
+    Limitations
+    -----------
+    Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions.
+    Parameter `k` is supported only of integer data type.
+    Otherwise the function will be executed sequentially on CPU.
+
     Examples
     --------
     >>> import dpnp as np
@@ -1372,12 +1402,20 @@ def triu(x1, k=0):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if not isinstance(k, int):
-            pass
-        else:
-            return dpnp_triu(x1_desc, k).get_pyobj()
+    _k = None
+    try:
+        _k = operator.index(k)
+    except TypeError:
+        pass
+
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
+        pass
+    elif x1.ndim < 2:
+        pass
+    elif _k is None:
+        pass
+    else:
+        return dpnp_container.triu(x1, k=_k)
 
     return call_origin(numpy.triu, x1, k)
 
@@ -1494,7 +1532,7 @@ def zeros_like(x1,
 
     Limitations
     -----------
-    Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`.
+    Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
     Parameter ``order`` is supported with values ``"C"`` or ``"F"``.
     Parameter ``subok`` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -1515,8 +1553,8 @@ def zeros_like(x1,
     >>> [i for i in np.zeros_like(x)]
     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 
-"""
-    if not isinstance(x1, dpnp.ndarray):
+    """
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
         pass
     elif order not in ('C', 'c', 'F', 'f', None):
         pass
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 0f1e1b5fc0e5..e94b0f6c1efb 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -219,18 +219,32 @@ def any(x1, axis=None, out=None, keepdims=False):
     return call_origin(numpy.any, x1, axis, out, keepdims)
 
 
-def equal(x1, x2):
+def equal(x1,
+          x2,
+          /,
+          out=None,
+          *,
+          where=True,
+          dtype=None,
+          subok=True):
     """
-    Return (x1 == x2) element-wise.
+    Return the truth value of (x1 == x2) element-wise.
 
     For full documentation refer to :obj:`numpy.equal`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Parameter ``x1`` is supported as :obj:`dpnp.ndarray`.
-    Parameter ``x2`` is supported as either :obj:`dpnp.ndarray` or int.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Sizes, shapes and data types of input arrays ``x1`` and ``x2`` are supported to be equal.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
+    Otherwise the function will be executed sequentially on CPU.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -250,33 +264,57 @@ def equal(x1, x2):
     [True, True, False]
 
     """
-
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size != x2_desc.size:
-    #         pass
-    #     elif x1_desc.dtype != x2_desc.dtype:
-    #         pass
-    #     elif x1_desc.shape != x2_desc.shape:
-    #         pass
-    #     else:
-    #         return dpnp_equal(x1_desc, x2_desc).get_pyobj()
-
+    
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.equal, x1, x2)
 
 
-def greater(x1, x2):
+def greater(x1,
+            x2,
+            /,
+            out=None,
+            *,
+            where=True,
+            dtype=None,
+            subok=True):
     """
-    Return (x1 > x2) element-wise.
+    Return the truth value of (x1 > x2) element-wise.
 
     For full documentation refer to :obj:`numpy.greater`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -297,30 +335,56 @@ def greater(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         return dpnp_greater(x1_desc, x2_desc).get_pyobj()
-
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_greater(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.greater, x1, x2)
 
 
-def greater_equal(x1, x2):
+def greater_equal(x1,
+                  x2,
+                  /,
+                  out=None,
+                  *,
+                  where=True,
+                  dtype=None,
+                  subok=True):
     """
-    Return (x1 >= x2) element-wise.
+    Return the truth value of (x1 >= x2) element-wise.
 
     For full documentation refer to :obj:`numpy.greater_equal`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -341,16 +405,27 @@ def greater_equal(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         return dpnp_greater_equal(x1_desc, x2_desc).get_pyobj()
-
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_greater_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.greater_equal, x1, x2)
 
 
@@ -532,17 +607,32 @@ def isnan(x1, out=None, **kwargs):
     return call_origin(numpy.isnan, x1, out, **kwargs)
 
 
-def less(x1, x2):
+def less(x1,
+         x2,
+         /,
+         out=None,
+         *,
+         where=True,
+         dtype=None,
+         subok=True):
     """
-    Return (x1 < x2) element-wise.
+    Return the truth value of (x1 < x2) element-wise.
 
     For full documentation refer to :obj:`numpy.less`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -563,30 +653,56 @@ def less(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         return dpnp_less(x1_desc, x2_desc).get_pyobj()
-
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_less(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.less, x1, x2)
 
 
-def less_equal(x1, x2):
+def less_equal(x1,
+               x2,
+               /,
+               out=None,
+               *,
+               where=True,
+               dtype=None,
+               subok=True):
     """
-    Return (x1 <= x2) element-wise.
+    Return the truth value of (x1 <= x2) element-wise.
 
     For full documentation refer to :obj:`numpy.less_equal`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -607,32 +723,56 @@ def less_equal(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         return dpnp_less_equal(x1_desc, x2_desc).get_pyobj()
-
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_less_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.less_equal, x1, x2)
 
 
-def logical_and(x1, x2, out=None, **kwargs):
+def logical_and(x1,
+                x2,
+                /,
+                out=None,
+                *,
+                where=True,
+                dtype=None,
+                subok=True):
     """
     Compute the truth value of x1 AND x2 element-wise.
 
     For full documentation refer to :obj:`numpy.logical_and`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise logical comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -652,30 +792,55 @@ def logical_and(x1, x2, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_and(x1_desc, x2_desc).get_pyobj()
-
-    return call_origin(numpy.logical_and, x1, x2, out, **kwargs)
-
-
-def logical_not(x1, out=None, **kwargs):
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_logical_and(x1_desc, x2_desc).get_pyobj()
+    return call_origin(numpy.logical_and, x1, x2)
+
+
+def logical_not(x,
+                /,
+                out=None,
+                *,
+                where=True,
+                dtype=None,
+                subok=True):
     """
     Compute the truth value of NOT x element-wise.
 
     For full documentation refer to :obj:`numpy.logical_not`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Boolean result with the same shape as `x` of the NOT operation
+        on elements of `x`.
+
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
+    Parameters `x` is only supported as :class:`dpnp.ndarray`.
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data type is limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -693,29 +858,47 @@ def logical_not(x1, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # if x1_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_not(x1_desc).get_pyobj()
-
-    return call_origin(numpy.logical_not, x1, out, **kwargs)
-
-
-def logical_or(x1, x2, out=None, **kwargs):
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    else:
+        x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_strides=False, copy_when_nondefault_queue=False)
+        if x1_desc:
+            return dpnp_logical_not(x1_desc).get_pyobj()
+    return call_origin(numpy.logical_not, x)
+
+
+def logical_or(x1,
+               x2,
+               /,
+               out=None,
+               *,
+               where=True,
+               dtype=None,
+               subok=True):
     """
     Compute the truth value of x1 OR x2 element-wise.
 
     For full documentation refer to :obj:`numpy.logical_or`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise logical comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -735,30 +918,56 @@ def logical_or(x1, x2, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_or(x1_desc, x2_desc).get_pyobj()
-
-    return call_origin(numpy.logical_or, x1, x2, out, **kwargs)
-
-
-def logical_xor(x1, x2, out=None, **kwargs):
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_logical_or(x1_desc, x2_desc).get_pyobj()
+    return call_origin(numpy.logical_or, x1, x2)
+
+
+def logical_xor(x1,
+               x2,
+               /,
+               out=None,
+               *,
+               where=True,
+               dtype=None,
+               subok=True):
     """
-    Compute the truth value of x1 XOR x2, element-wise.
+    Compute the truth value of x1 XOR x2 element-wise.
 
     For full documentation refer to :obj:`numpy.logical_xor`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise logical comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -778,29 +987,56 @@ def logical_xor(x1, x2, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj()
-
-    return call_origin(numpy.logical_xor, x1, x2, out, **kwargs)
-
-
-def not_equal(x1, x2):
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj()
+    return call_origin(numpy.logical_xor, x1, x2)
+
+
+def not_equal(x1,
+              x2,
+              /,
+              out=None,
+              *,
+              where=True,
+              dtype=None,
+              subok=True):
     """
-    Return (x1 != x2) element-wise.
+    Return the truth value of (x1 != x2) element-wise.
 
     For full documentation refer to :obj:`numpy.not_equal`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
-    If either ``x1`` or ``x2`` is scalar then other one should be :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -821,16 +1057,25 @@ def not_equal(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         result = dpnp_not_equal(x1_desc, x2_desc).get_pyobj()
-
-    #         return result
-
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_not_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.not_equal, x1, x2)
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 26b81a67dd95..feff53288cfd 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -154,58 +154,68 @@ def absolute(x1, **kwargs):
     return call_origin(numpy.absolute, x1, **kwargs)
 
 
-def add(x1, x2, dtype=None, out=None, where=True, **kwargs):
+def add(x1,
+        x2,
+        /,
+        out=None,
+        *,
+        where=True,
+        dtype=None,
+        subok=True,
+        **kwargs):
     """
     Add arguments element-wise.
 
     For full documentation refer to :obj:`numpy.add`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        The sum of `x1` and `x2`, element-wise.
+
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
-    Otherwise the functions will be executed sequentially on CPU.
+    Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> a = np.array([1, 2, 3])
-    >>> b = np.array([1, 2, 3])
-    >>> result = np.add(a, b)
-    >>> [x for x in result]
+    >>> import dpnp as dp
+    >>> a = dp.array([1, 2, 3])
+    >>> b = dp.array([1, 2, 3])
+    >>> result = dp.add(a, b)
+    >>> print(result)
     [2, 4, 6]
 
     """
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
-        else:
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
-            return dpnp_add(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_add(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
-    return call_origin(numpy.add, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return call_origin(numpy.add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def around(x1, decimals=0, out=None):
@@ -534,55 +544,66 @@ def diff(x1, n=1, axis=-1, prepend=numpy._NoValue, append=numpy._NoValue):
     return call_origin(numpy.diff, x1, n=n, axis=axis, prepend=prepend, append=append)
 
 
-def divide(x1, x2, dtype=None, out=None, where=True, **kwargs):
+def divide(x1,
+           x2,
+           /,
+           out=None,
+           *,
+           where=True,
+           dtype=None,
+           subok=True,
+           **kwargs):
     """
     Divide arguments element-wise.
 
     For full documentation refer to :obj:`numpy.divide`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        The quotient ``x1/x2``, element-wise.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
-    Otherwise the functions will be executed sequentially on CPU.
+    Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> result = np.divide(np.array([1, -2, 6, -9]), np.array([-2, -2, -2, -2]))
-    >>> [x for x in result]
+    >>> import dpnp as dp
+    >>> result = dp.divide(dp.array([1, -2, 6, -9]), dp.array([-2, -2, -2, -2]))
+    >>> print(result)
     [-0.5, 1.0, -3.0, 4.5]
 
     """
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
-        else:
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
             return dpnp_divide(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
-    return call_origin(numpy.divide, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return call_origin(numpy.divide, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def ediff1d(x1, to_end=None, to_begin=None):
@@ -1093,11 +1114,11 @@ def multiply(x1,
     -------
     y : {dpnp.ndarray, scalar}
         The product of `x1` and `x2`, element-wise.
-        The result is a scalar if both x1 and x2 are scalars.
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
     Otherwise the functions will be executed sequentially on CPU.
@@ -1122,18 +1143,20 @@ def multiply(x1,
     elif subok is not True:
         pass
     elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
-        # keep the result in host memory, if both inputs are scalars
-        return x1 * x2
+        # at least either x1 or x2 has to be an array
+        pass
     else:
-        # get a common queue to copy data from the host into a device if any input is scalar
-        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_multiply(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
-    return call_origin(numpy.multiply, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return call_origin(numpy.multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def nancumprod(x1, **kwargs):
@@ -1508,60 +1531,69 @@ def sign(x1, **kwargs):
     return call_origin(numpy.sign, x1, **kwargs)
 
 
-def subtract(x1, x2, dtype=None, out=None, where=True, **kwargs):
+def subtract(x1,
+             x2,
+             /,
+             out=None,
+             *,
+             where=True,
+             dtype=None,
+             subok=True,
+             **kwargs):
     """
     Subtract arguments, element-wise.
 
     For full documentation refer to :obj:`numpy.subtract`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        The difference of `x1` and `x2`, element-wise.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
-    Otherwise the functions will be executed sequentially on CPU.
+    Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Example
     -------
-    >>> import dpnp as np
-    >>> result = np.subtract(np.array([4, 3]), np.array([2, 7]))
-    >>> [x for x in result]
+    >>> import dpnp as dp
+    >>> result = dp.subtract(dp.array([4, 3]), dp.array([2, 7]))
+    >>> print(result)
     [2, -4]
 
     """
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x1_desc and x1_desc.dtype == numpy.bool:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.dtype == numpy.bool:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
-        else:
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
-            return dpnp_subtract(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            if x1_desc.dtype == x2_desc.dtype == dpnp.bool:
+                raise TypeError("DPNP boolean subtract, the `-` operator, is not supported, "
+                                "use the bitwise_xor, the `^` operator, or the logical_xor function instead.")
+            return dpnp_subtract(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
-    return call_origin(numpy.subtract, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return call_origin(numpy.subtract, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def sum(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index 27eaf4a115f5..ab92f8cc6251 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -299,7 +299,7 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     return call_origin(numpy.cov, x1, y, rowvar, bias, ddof, fweights, aweights)
 
 
-def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
+def histogram(a, bins=10, range=None, density=None, weights=None):
     """
     Compute the histogram of a dataset.
     For full documentation refer to :obj:`numpy.histogram`.
@@ -323,7 +323,7 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
     1.0
     """
 
-    return call_origin(numpy.histogram, a=a, bins=bins, range=range, normed=normed, weights=weights, density=density)
+    return call_origin(numpy.histogram, a=a, bins=bins, range=range, density=density, weights=weights)
 
 
 def max(x1, axis=None, out=None, keepdims=False, initial=None, where=True):
diff --git a/dpnp/dpnp_iface_types.py b/dpnp/dpnp_iface_types.py
index dfcf599bf3be..a39cfa47cd12 100644
--- a/dpnp/dpnp_iface_types.py
+++ b/dpnp/dpnp_iface_types.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -36,12 +36,12 @@
 
 import numpy
 
+
 __all__ = [
     "bool",
     "bool_",
     "complex128",
     "complex64",
-    "default_float_type",
     "dtype",
     "float",
     "float16",
@@ -59,7 +59,7 @@
     "void"
 ]
 
-bool = numpy.bool
+bool = numpy.bool_
 bool_ = numpy.bool_
 complex128 = numpy.complex128
 complex64 = numpy.complex64
@@ -67,18 +67,14 @@
 float16 = numpy.float16
 float32 = numpy.float32
 float64 = numpy.float64
-float = numpy.float
+float = numpy.float_
 int32 = numpy.int32
 int64 = numpy.int64
 integer = numpy.integer
-int = numpy.int
+int = numpy.int_
 longcomplex = numpy.longcomplex
 
 
-def default_float_type():
-    return float64
-
-
 def isscalar(obj):
     """
     Returns True if the type of `obj` is a scalar type.
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pxd b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
index 0924dae26408..db7127319bb0 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pxd
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -123,7 +123,7 @@ cdef class dpnp_descriptor:
     cdef void * get_data(self)
 
 
-cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape)
+cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except *
 """
 Calculate common shape from input shapes
 """
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index c09bef8ec485..672aa19e4dcb 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -34,7 +34,7 @@ This module contains differnt helpers and utilities
 import numpy
 
 import dpctl
-import dpctl.tensor as dpt
+import dpctl.utils as dpu
 
 import dpnp.config as config
 import dpnp.dpnp_container as dpnp_container
@@ -70,7 +70,7 @@ __all__ = [
     "dpnp_descriptor",
     "get_axis_indeces",
     "get_axis_offsets",
-    "get_common_allocation_queue",
+    "get_usm_allocations",
     "_get_linear_index",
     "map_dtype_to_device",
     "normalize_axis",
@@ -163,9 +163,9 @@ def call_origin(function, *args, **kwargs):
         kwargx = convert_item(kwarg)
         kwargs_new[key] = kwargx
 
-    exec_q = dpctl.utils.get_execution_queue(alloc_queues)
+    exec_q = dpu.get_execution_queue(alloc_queues)
     if exec_q is None:
-        exec_q = sycl_queue
+        exec_q = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue)
     # print(f"DPNP call_origin(): bakend called. \n\t function={function}, \n\t args_new={args_new}, \n\t kwargs_new={kwargs_new}, \n\t dpnp_inplace={dpnp_inplace}")
     # TODO need to put array memory into NumPy call
     result_origin = function(*args_new, **kwargs_new)
@@ -220,30 +220,49 @@ def unwrap_array(x1):
     return x1
 
 
-def get_common_allocation_queue(objects):
-    """
-    Given a list of objects returns the queue which can be used for a memory allocation
-    to follow compute follows data paradigm, or returns `None` if the default queue can be used.
-    An exception will be raised, if the paradigm is broked for the given list of objects.
-    """
-    if not isinstance(objects, (list, tuple)):
-        raise TypeError("Expected a list or a tuple, got {}".format(type(objects)))
-    
-    if len(objects) == 0:
+def _get_coerced_usm_type(objects):
+    types_in_use = [obj.usm_type for obj in objects if hasattr(obj, "usm_type")]
+    if len(types_in_use) == 0:
         return None
+    elif len(types_in_use) == 1:
+        return types_in_use[0]
+
+    common_usm_type = dpu.get_coerced_usm_type(types_in_use)
+    if common_usm_type is None:
+        raise ValueError("Input arrays must have coerced USM types")
+    return common_usm_type
 
+
+def _get_common_allocation_queue(objects):
     queues_in_use = [obj.sycl_queue for obj in objects if hasattr(obj, "sycl_queue")]
     if len(queues_in_use) == 0:
         return None
     elif len(queues_in_use) == 1:
         return queues_in_use[0]
 
-    common_queue = dpt.get_execution_queue(queues_in_use)
+    common_queue = dpu.get_execution_queue(queues_in_use)
     if common_queue is None:
         raise ValueError("Input arrays must be allocated on the same SYCL queue")
     return common_queue
 
 
+def get_usm_allocations(objects):
+    """
+    Given a list of objects returns a tuple of USM type and SYCL queue
+    which can be used for a memory allocation and to follow compute follows data paradigm,
+    or returns `(None, None)` if the default USM type and SYCL queue can be used.
+    An exception will be raised, if the paradigm is broked for the given list of objects.
+
+    """
+
+    if not isinstance(objects, (list, tuple)):
+        raise TypeError("Expected a list or a tuple, got {}".format(type(objects)))
+    
+    if len(objects) == 0:
+        return (None, None)
+    return (_get_coerced_usm_type(objects), _get_common_allocation_queue(objects))
+
+
 def map_dtype_to_device(dtype, device):
     """
     Map an input ``dtype`` with type ``device`` may use
@@ -399,7 +418,7 @@ cdef tuple get_shape_dtype(object input_obj):
 
             # shape and dtype does not match with siblings.
             if ((return_shape != elem_shape) or (return_dtype != elem_dtype)):
-                return (elem_shape, numpy.dtype(numpy.object))
+                return (elem_shape, numpy.dtype(numpy.object_))
 
         list_shape.push_back(len(input_obj))
         list_shape.insert(list_shape.end(), return_shape.begin(), return_shape.end())
@@ -429,7 +448,9 @@ cpdef find_common_type(object x1_obj, object x2_obj):
     return numpy.find_common_type(array_types, scalar_types)
 
 
-cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape):
+cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except *:
+    cdef shape_type_c input1_shape_orig = input1_shape
+    cdef shape_type_c input2_shape_orig = input2_shape
     cdef shape_type_c result_shape
 
     # ex (8, 1, 6, 1) and (7, 1, 5) -> (8, 1, 6, 1) and (1, 7, 1, 5)
@@ -446,9 +467,9 @@ cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input
         elif input2_shape[it] == 1:
             result_shape.push_back(input1_shape[it])
         else:
-            err_msg = f"{ERROR_PREFIX} in function get_common_shape()"
-            err_msg += f"operands could not be broadcast together with shapes {input1_shape} {input2_shape}"
-            ValueError(err_msg)
+            err_msg = f"{ERROR_PREFIX} in function get_common_shape(): "
+            err_msg += f"operands could not be broadcast together with shapes {input1_shape_orig} {input2_shape_orig}"
+            raise ValueError(err_msg)
 
     return result_shape
 
@@ -629,10 +650,7 @@ cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2):
             "could not recognize common USM type for inputs of USM types {} and {}"
             "".format(array1_obj.usm_type, array2_obj.usm_type))
 
-    common_sycl_queue = dpctl.utils.get_execution_queue((array1_obj.sycl_queue, array2_obj.sycl_queue))
-    # TODO: refactor, remove when CFD is implemented in all array constructors
-    if common_sycl_queue is None and array1_obj.sycl_context == array2_obj.sycl_context:
-        common_sycl_queue = array1_obj.sycl_queue
+    common_sycl_queue = dpu.get_execution_queue((array1_obj.sycl_queue, array2_obj.sycl_queue))
     if common_sycl_queue is None:
         raise ValueError(
             "could not recognize common SYCL queue for inputs in SYCL queues {} and {}"
diff --git a/dpnp/random/dpnp_iface_random.py b/dpnp/random/dpnp_iface_random.py
index 677f2a7e94bc..ade85bb2fe18 100644
--- a/dpnp/random/dpnp_iface_random.py
+++ b/dpnp/random/dpnp_iface_random.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -98,11 +98,20 @@
 ]
 
 
-def _get_random_state():
-    global _dpnp_random_state
-    if _dpnp_random_state is None:
-        _dpnp_random_state = RandomState()
-    return  _dpnp_random_state
+def _get_random_state(device=None, sycl_queue=None):
+    global _dpnp_random_states
+
+    if not isinstance(_dpnp_random_states, dict):
+         _dpnp_random_states = dict()
+    sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
+    if sycl_queue not in _dpnp_random_states:
+        rs = RandomState(device=device, sycl_queue=sycl_queue)
+        if sycl_queue == rs.get_sycl_queue():
+            _dpnp_random_states[sycl_queue] = rs
+        else:
+            raise RuntimeError("Normalized SYCL queue {} mismatched with one returned by RandmoState {}"
+                               .format(sycl_queue, rs.get_sycl_queue()))
+    return _dpnp_random_states[sycl_queue]
 
 
 def beta(a, b, size=None):
@@ -774,20 +783,42 @@ def negative_binomial(n, p, size=None):
     return call_origin(numpy.random.negative_binomial, n, p, size)
 
 
-def normal(loc=0.0, scale=1.0, size=None, usm_type='device'):
+def normal(loc=0.0,
+           scale=1.0,
+           size=None,
+           device=None,
+           usm_type="device",
+           sycl_queue=None):
     """
-    Normal distribution.
-
     Draw random samples from a normal (Gaussian) distribution.
 
     For full documentation refer to :obj:`numpy.random.normal`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Drawn samples from the parameterized normal distribution.
+        Output array data type is the same as input `dtype`. If `dtype` is ``None`` (the default),
+        :obj:`dpnp.float64` type will be used if device supports it, or :obj:`dpnp.float32` otherwise.
+
     Limitations
     -----------
-    Parameters ``loc`` and ``scale`` are supported as scalar.
+    Parameters `loc` and `scale` are supported as scalar.
     Otherwise, :obj:`numpy.random.normal(loc, scale, size)` samples are drawn.
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameter `dtype` is supported only as :obj:`dpnp.float32`, :obj:`dpnp.float64` or ``None``.
 
     Examples
     --------
@@ -796,11 +827,9 @@ def normal(loc=0.0, scale=1.0, size=None, usm_type='device'):
     >>> s = dpnp.random.normal(mu, sigma, 1000)
 
     """
-    return _get_random_state().normal(loc=loc,
-                                      scale=scale,
-                                      size=size,
-                                      dtype=None,
-                                      usm_type=usm_type)
+
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.normal(loc=loc, scale=scale, size=size, dtype=None, usm_type=usm_type)
 
 
 def noncentral_chisquare(df, nonc, size=None):
@@ -986,7 +1015,11 @@ def power(a, size=None):
     return call_origin(numpy.random.power, a, size)
 
 
-def rand(d0, *dn, usm_type="device"):
+def rand(d0,
+         *dn,
+         device=None,
+         usm_type="device",
+         sycl_queue=None):
     """
     Random values in a given shape.
 
@@ -995,10 +1028,24 @@ def rand(d0, *dn, usm_type="device"):
 
     For full documentation refer to :obj:`numpy.random.rand`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Random values in a given shape.
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1012,20 +1059,48 @@ def rand(d0, *dn, usm_type="device"):
 
     """
 
-    return _get_random_state().rand(d0, *dn, usm_type=usm_type)
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.rand(d0, *dn, usm_type=usm_type)
 
 
-def randint(low, high=None, size=None, dtype=int, usm_type="device"):
+def randint(low,
+            high=None,
+            size=None,
+            dtype=int,
+            device=None,
+            usm_type="device",
+            sycl_queue=None):
     """
     Return random integers from `low` (inclusive) to `high` (exclusive).
 
     For full documentation refer to :obj:`numpy.random.randint`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        `size`-shaped array of random integers from the appropriate distribution,
+        or a single such random int if `size` is not provided.
+        Output array data type is the same as input `dtype`.
+
     Limitations
     -----------
-    Parameters ``low`` and ``high`` are supported only as scalar.
-    Parameter ``dtype`` is supported only as `int`.
-    Otherwise, :obj:`numpy.random.randint(low, high, size, dtype)` samples are drawn.
+    Parameters `low` and `high` are supported only as a scalar.
+    Parameter `dtype` is supported only as :obj:`dpnp.int32` or ``int``,
+    but ``int`` value is considered to be exactly equivalent to :obj:`dpnp.int32`.
+    Otherwise, :obj:`numpy.random.RandomState.randint(low, high, size, dtype)` samples are drawn.
 
     Examples
     --------
@@ -1041,23 +1116,39 @@ def randint(low, high=None, size=None, dtype=int, usm_type="device"):
 
     """
 
-    return _get_random_state().randint(low=low,
-                                       high=high,
-                                       size=size,
-                                       dtype=dtype,
-                                       usm_type=usm_type)
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.randint(low=low, high=high, size=size, dtype=dtype, usm_type=usm_type)
 
 
-def randn(d0, *dn, usm_type="device"):
+def randn(d0,
+          *dn,
+          device=None,
+          usm_type="device",
+          sycl_queue=None):
     """
     Return a sample (or samples) from the "standard normal" distribution.
 
     For full documentation refer to :obj:`numpy.random.randn`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        A ``(d0, d1, ..., dn)``-shaped array of floating-point samples from
+        the standard normal distribution, or a single such float if no parameters were supplied.
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1075,20 +1166,38 @@ def randn(d0, *dn, usm_type="device"):
 
     """
 
-    return _get_random_state().randn(d0, *dn, usm_type=usm_type)
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.randn(d0, *dn, usm_type=usm_type)
 
 
-def random(size=None, usm_type="device"):
+def random(size=None,
+           device=None,
+           usm_type="device",
+           sycl_queue=None):
     """
     Return random floats in the half-open interval [0.0, 1.0).
     Alias for random_sample.
 
     For full documentation refer to :obj:`numpy.random.random`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned).
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1102,20 +1211,43 @@ def random(size=None, usm_type="device"):
 
     """
 
-    return random_sample(size=size, usm_type=usm_type)
+    return random_sample(size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue)
 
 
-def random_integers(low, high=None, size=None, usm_type="device"):
+def random_integers(low,
+                    high=None,
+                    size=None,
+                    device=None,
+                    usm_type="device",
+                    sycl_queue=None):
     """
     Random integers between `low` and `high`, inclusive.
 
     For full documentation refer to :obj:`numpy.random.random_integers`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        `size`-shaped array of random integers from the appropriate distribution,
+        or a single such random int if `size` is not provided.
+
     Limitations
     -----------
-    Parameters ``low`` and ``high`` are supported as scalar.
-    Otherwise, :obj:`numpy.random.random_integers(low, high, size)` samples
-    are drawn.
+    Parameters `low` and `high` are supported as scalar.
+    Otherwise, :obj:`numpy.random.random_integers(low, high, size)` samples are drawn.
 
     See Also
     --------
@@ -1134,12 +1266,15 @@ def random_integers(low, high=None, size=None, usm_type="device"):
     elif not dpnp.isscalar(high):
         pass
     else:
-        return randint(low, int(high) + 1, size=size, usm_type=usm_type)
+        return randint(low, int(high) + 1, size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue)
 
     return call_origin(numpy.random.random_integers, low, high, size)
 
 
-def random_sample(size=None, usm_type="device"):
+def random_sample(size=None,
+                  device=None,
+                  usm_type="device",
+                  sycl_queue=None):
     """
     Return random floats in the half-open interval [0.0, 1.0).
 
@@ -1147,10 +1282,24 @@ def random_sample(size=None, usm_type="device"):
 
     For full documentation refer to :obj:`numpy.random.random_sample`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned).
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1164,21 +1313,38 @@ def random_sample(size=None, usm_type="device"):
 
     """
 
-    return _get_random_state().random_sample(size=size,
-                                             usm_type=usm_type)
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.random_sample(size=size, usm_type=usm_type)
 
 
-def ranf(size=None, usm_type="device"):
+def ranf(size=None,
+         device=None,
+         usm_type="device",
+         sycl_queue=None):
     """
     Return random floats in the half-open interval [0.0, 1.0).
     This is an alias of random_sample.
 
     For full documentation refer to :obj:`numpy.random.ranf`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned).
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1193,7 +1359,7 @@ def ranf(size=None, usm_type="device"):
 
     """
 
-    return random_sample(size=size, usm_type=usm_type)
+    return random_sample(size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue)
 
 
 def rayleigh(scale=1.0, size=None):
@@ -1230,17 +1396,34 @@ def rayleigh(scale=1.0, size=None):
     return call_origin(numpy.random.rayleigh, scale, size)
 
 
-def sample(size=None, usm_type="device"):
+def sample(size=None,
+           device=None,
+           usm_type="device",
+           sycl_queue=None):
     """
     Return random floats in the half-open interval [0.0, 1.0).
     This is an alias of random_sample.
 
     For full documentation refer to :obj:`numpy.random.sample`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned).
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1255,7 +1438,7 @@ def sample(size=None, usm_type="device"):
 
     """
 
-    return random_sample(size=size, usm_type=usm_type)
+    return random_sample(size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue)
 
 
 def shuffle(x1):
@@ -1283,18 +1466,35 @@ def shuffle(x1):
     return
 
 
-def seed(seed=None):
+def seed(seed=None,
+         device=None,
+         sycl_queue=None):
     """
-    Reseed a legacy mt19937 random number generator engine.
+    Reseed a legacy MT19937 random number generator engine.
+
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where an array with generated numbers will be created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for an array with generated numbers.
 
     Limitations
     -----------
-    Parameter ``seed`` is supported as a scalar.
-    Otherwise, the function will use :obj:`numpy.random.seed` on the backend
-    and will be executed on fallback backend.
+    Parameter `seed` is supported as either a scalar or an array of maximumum three integer scalars.
 
     """
 
+    # update a mt19937 random number for both RandomState and legacy functionality
+    global _dpnp_random_states
+
+    sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
+    _dpnp_random_states[sycl_queue] = RandomState(seed=seed, sycl_queue=sycl_queue)
+
     if not use_origin_backend(seed):
         # TODO:
         # array_like of ints for `seed`
@@ -1307,10 +1507,6 @@ def seed(seed=None):
         else:
             # TODO:
             # migrate to a single approach with RandomState class
-
-            # update a mt19937 random number for both RandomState and legacy functionality
-            global _dpnp_random_state
-            _dpnp_random_state = RandomState(seed)
             dpnp_rng_srand(seed)
 
     # always reseed numpy engine also
@@ -1405,17 +1601,34 @@ def standard_gamma(shape, size=None):
     return call_origin(numpy.random.standard_gamma, shape, size)
 
 
-def standard_normal(size=None, usm_type="device"):
-    """Standard normal distribution.
-
+def standard_normal(size=None,
+                    device=None,
+                    usm_type="device",
+                    sycl_queue=None):
+    """
     Draw samples from a standard Normal distribution (mean=0, stdev=1).
 
     For full documentation refer to :obj:`numpy.random.standard_normal`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        A floating-point array of shape `size` of drawn samples, or a
+        single sample if `size` was not specified.
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1423,7 +1636,9 @@ def standard_normal(size=None, usm_type="device"):
     >>> s = dpnp.random.standard_normal(1000)
 
     """
-    return _get_random_state().standard_normal(size=size, usm_type=usm_type)
+
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.standard_normal(size=size, usm_type=usm_type)
 
 
 def standard_t(df, size=None):
@@ -1506,18 +1721,45 @@ def triangular(left, mode, right, size=None):
     return call_origin(numpy.random.triangular, left, mode, right, size)
 
 
-def uniform(low=0.0, high=1.0, size=None, usm_type='device'):
+def uniform(low=0.0,
+            high=1.0,
+            size=None,
+            device=None,
+            usm_type="device",
+            sycl_queue=None):
     """
     Draw samples from a uniform distribution.
 
+    Samples are uniformly distributed over the half-open interval [low, high) (includes low, but excludes high).
+    In other words, any value within the given interval is equally likely to be drawn by uniform.
+
     For full documentation refer to :obj:`numpy.random.uniform`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Drawn samples from the parameterized uniform distribution.
+        Output array data type is the same as input `dtype`. If `dtype` is ``None`` (the default),
+        :obj:`dpnp.float64` type will be used if device supports it, or :obj:`dpnp.float32` otherwise.
+
     Limitations
     -----------
-    Parameters ``low`` and ``high`` are supported as scalar.
-    Otherwise, :obj:`numpy.random.uniform(low, high, size)` samples are drawn.
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters `low` and `high` are supported as a scalar. Otherwise,
+    :obj:`numpy.random.uniform(low, high, size)` samples are drawn.
+    Parameter `dtype` is supported only as :obj:`dpnp.int32`, :obj:`dpnp.float32`, :obj:`dpnp.float64` or ``None``.
 
     Examples
     --------
@@ -1530,11 +1772,9 @@ def uniform(low=0.0, high=1.0, size=None, usm_type='device'):
     :obj:`dpnp.random.random` : Floats uniformly distributed over ``[0, 1)``.
 
     """
-    return _get_random_state().uniform(low=low,
-                                       high=high,
-                                       size=size,
-                                       dtype=None,
-                                       usm_type=usm_type)
+
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.uniform(low=low, high=high, size=size, dtype=None, usm_type=usm_type)
 
 
 def vonmises(mu, kappa, size=None):
@@ -1679,4 +1919,4 @@ def zipf(a, size=None):
     return call_origin(numpy.random.zipf, a, size)
 
 
-_dpnp_random_state = None
+_dpnp_random_states = {}
diff --git a/dpnp/random/dpnp_random_state.py b/dpnp/random/dpnp_random_state.py
index 1d4648c31c47..c224553b0cff 100644
--- a/dpnp/random/dpnp_random_state.py
+++ b/dpnp/random/dpnp_random_state.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -76,7 +76,12 @@ class RandomState:
     """
 
     def __init__(self, seed=None, device=None, sycl_queue=None):
-        self._seed = 1 if seed is None else seed
+        if seed is None:
+            # ask NumPy to generate an array of three random integers as default seed value
+            self._seed = numpy.random.randint(low=0, high=numpy.iinfo(numpy.int32).max + 1, size=3)
+        else:
+            self._seed = seed
+
         self._sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
         self._sycl_device = self._sycl_queue.sycl_device
 
@@ -290,7 +295,7 @@ def rand(self, *args, usm_type="device"):
 
     def randint(self, low, high=None, size=None, dtype=int, usm_type="device"):
         """
-        Draw random integers from low (inclusive) to high (exclusive).
+        Draw random integers from `low` (inclusive) to `high` (exclusive).
 
         Return random integers from the “discrete uniform” distribution of the specified type
         in the “half-open” interval [low, high).
@@ -332,7 +337,7 @@ def randint(self, low, high=None, size=None, dtype=int, usm_type="device"):
         if not use_origin_backend(low):
             if not dpnp.isscalar(low):
                 pass
-            elif not dpnp.isscalar(high):
+            elif not (high is None or dpnp.isscalar(high)):
                 pass
             else:
                 _dtype = dpnp.int32 if dtype is int else dpnp.dtype(dtype)
diff --git a/dpnp/version.py b/dpnp/version.py
index 160e8ec963a8..f09ea3c76a75 100644
--- a/dpnp/version.py
+++ b/dpnp/version.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,6 @@
 DPNP version module
 """
 
-__version__: str = '0.11.0'
+__version__: str = '0.11.1'
 
 version: str = __version__
diff --git a/examples/example4.py b/examples/example4.py
index 0790f84d10aa..6705149d52ba 100755
--- a/examples/example4.py
+++ b/examples/example4.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,7 @@
 """
 for function in [numpy.sqrt, numpy.fabs, numpy.reciprocal, numpy.square, numpy.cbrt, numpy.degrees, numpy.radians]:
     print()
-    for test_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool]:
+    for test_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_]:
         data = numpy.array([1, 2, 3, 4], dtype=test_type)
         result = function(data)
         print(f"input:{data.dtype.name:10}: outout:{result.dtype.name:10}: name:{function.__name__}")
@@ -50,8 +50,8 @@
 """
 for function in [numpy.equal, numpy.arctan2]:
     print()
-    for input1_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool]:
-        for input2_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool]:
+    for input1_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_]:
+        for input2_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_]:
             data1 = numpy.array([1, 2, 3, 4], dtype=input1_type)
             data2 = numpy.array([11, 21, 31, 41], dtype=input2_type)
             result = function(data1, data2)
diff --git a/tests/conftest.py b/tests/conftest.py
index 78d3180bac08..22276f125f26 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -77,3 +77,22 @@ def pytest_collection_modifyitems(config, items):
 @pytest.fixture
 def allow_fall_back_on_numpy(monkeypatch):
     monkeypatch.setattr(dpnp.config, '__DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK__', 0)
+
+@pytest.fixture
+def suppress_divide_numpy_warnings():
+    # divide: treatment for division by zero (infinite result obtained from finite numbers)
+    old_settings = numpy.seterr(divide='ignore')
+    yield
+    numpy.seterr(**old_settings)  # reset to default
+
+@pytest.fixture
+def suppress_invalid_numpy_warnings():
+    # invalid: treatment for invalid floating-point operation
+    # (result is not an expressible number, typically indicates that a NaN was produced)
+    old_settings = numpy.seterr(invalid='ignore')
+    yield
+    numpy.seterr(**old_settings)  # reset to default
+
+@pytest.fixture
+def suppress_divide_invalid_numpy_warnings(suppress_divide_numpy_warnings, suppress_invalid_numpy_warnings):
+    yield
diff --git a/tests/helper.py b/tests/helper.py
new file mode 100644
index 000000000000..17c62cecd289
--- /dev/null
+++ b/tests/helper.py
@@ -0,0 +1,39 @@
+import dpctl
+import dpnp
+
+
+def get_all_dtypes(no_bool=False,
+                   no_float16=True,
+                   no_complex=False,
+                   no_none=False,
+                   device=None):
+    """
+    Build a list of types supported by DPNP based on input flags and device capabilities.
+    """
+
+    dev = dpctl.select_default_device() if device is None else device
+
+    # add boolean type
+    dtypes = [dpnp.bool] if not no_bool else []
+
+    # add integer types
+    dtypes.extend([dpnp.int32, dpnp.int64])
+
+    # add floating types
+    if not no_float16 and dev.has_aspect_fp16:
+        dtypes.append(dpnp.float16)
+
+    dtypes.append(dpnp.float32)
+    if dev.has_aspect_fp64:
+        dtypes.append(dpnp.float64)
+
+    # add complex types
+    if not no_complex:
+        dtypes.append(dpnp.complex64)
+        if dev.has_aspect_fp64:
+            dtypes.append(dpnp.complex128)
+
+    # add None value to validate a default dtype
+    if not no_none:
+        dtypes.append(None)
+    return dtypes
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index b8b02e95bbfb..2f0334077a06 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -35,54 +35,42 @@ tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpct
 
 tests/test_arraymanipulation.py::TestHstack::test_generator
 tests/test_arraymanipulation.py::TestVstack::test_generator
+
 tests/test_dparray.py::test_astype[[]-float64-float64]
 tests/test_dparray.py::test_astype[[]-float64-float32]
 tests/test_dparray.py::test_astype[[]-float64-int64]
 tests/test_dparray.py::test_astype[[]-float64-int32]
 tests/test_dparray.py::test_astype[[]-float64-bool]
-tests/test_dparray.py::test_astype[[]-float64-bool_]
 tests/test_dparray.py::test_astype[[]-float64-complex]
 tests/test_dparray.py::test_astype[[]-float32-float64]
 tests/test_dparray.py::test_astype[[]-float32-float32]
 tests/test_dparray.py::test_astype[[]-float32-int64]
 tests/test_dparray.py::test_astype[[]-float32-int32]
 tests/test_dparray.py::test_astype[[]-float32-bool]
-tests/test_dparray.py::test_astype[[]-float32-bool_]
 tests/test_dparray.py::test_astype[[]-float32-complex]
 tests/test_dparray.py::test_astype[[]-int64-float64]
 tests/test_dparray.py::test_astype[[]-int64-float32]
 tests/test_dparray.py::test_astype[[]-int64-int64]
 tests/test_dparray.py::test_astype[[]-int64-int32]
 tests/test_dparray.py::test_astype[[]-int64-bool]
-tests/test_dparray.py::test_astype[[]-int64-bool_]
 tests/test_dparray.py::test_astype[[]-int64-complex]
 tests/test_dparray.py::test_astype[[]-int32-float64]
 tests/test_dparray.py::test_astype[[]-int32-float32]
 tests/test_dparray.py::test_astype[[]-int32-int64]
 tests/test_dparray.py::test_astype[[]-int32-int32]
 tests/test_dparray.py::test_astype[[]-int32-bool]
-tests/test_dparray.py::test_astype[[]-int32-bool_]
 tests/test_dparray.py::test_astype[[]-int32-complex]
 tests/test_dparray.py::test_astype[[]-bool-float64]
 tests/test_dparray.py::test_astype[[]-bool-float32]
 tests/test_dparray.py::test_astype[[]-bool-int64]
 tests/test_dparray.py::test_astype[[]-bool-int32]
 tests/test_dparray.py::test_astype[[]-bool-bool]
-tests/test_dparray.py::test_astype[[]-bool-bool_]
 tests/test_dparray.py::test_astype[[]-bool-complex]
-tests/test_dparray.py::test_astype[[]-bool_-float64]
-tests/test_dparray.py::test_astype[[]-bool_-float32]
-tests/test_dparray.py::test_astype[[]-bool_-int64]
-tests/test_dparray.py::test_astype[[]-bool_-int32]
-tests/test_dparray.py::test_astype[[]-bool_-bool]
-tests/test_dparray.py::test_astype[[]-bool_-bool_]
-tests/test_dparray.py::test_astype[[]-bool_-complex]
 tests/test_dparray.py::test_astype[[]-complex-float64]
 tests/test_dparray.py::test_astype[[]-complex-float32]
 tests/test_dparray.py::test_astype[[]-complex-int64]
 tests/test_dparray.py::test_astype[[]-complex-int32]
 tests/test_dparray.py::test_astype[[]-complex-bool]
-tests/test_dparray.py::test_astype[[]-complex-bool_]
 tests/test_dparray.py::test_astype[[]-complex-complex]
 
 tests/test_linalg.py::test_cond[None-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]]
@@ -342,7 +330,6 @@ tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_like_
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_like_K_strides
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_like_subok
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_zero_sized_array_strides
-tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_eye
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_full_like_subok
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_ones_like_subok
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_zeros_like_subok
@@ -402,7 +389,7 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asar
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_ascontiguousarray_on_noncontiguous_array
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim_dtype
-tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_fromfile
+
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid2
@@ -778,18 +765,13 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_547_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='remainder', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_549_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='mod', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input
+
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_2_{name='add', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract
+
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2
 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_copysign_float
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 01a2bb21dc92..e6598904e16f 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -18,7 +18,6 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9]
@@ -29,11 +28,9 @@ tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-copy-data3]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumprod-data4]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumsum-data5]
-tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-diff-data6]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ediff1d-data7]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-fabs-data8]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-floor-data9]
-tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-gradient-data10]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumprod-data11]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumsum-data12]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nanprod-data13]
@@ -91,6 +88,7 @@ tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesInvalidValu
 tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_0_{shape=(3, 3)}::test_diag_indices_from
 tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_1_{shape=(0, 0)}::test_diag_indices_from
 tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_2_{shape=(2, 2, 2)}::test_diag_indices_from
+
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_303_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_375_{arg1=array([[1., 2., 3.],       [4., 5., 6.]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
@@ -103,6 +101,7 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int64), arg2=array([[0., 1., 2.],       [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
+
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_all
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_axis
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_sum_all
@@ -301,54 +300,42 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{extern
 tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpctl_memory
 tests/test_arraymanipulation.py::TestHstack::test_generator
 tests/test_arraymanipulation.py::TestVstack::test_generator
+
 tests/test_dparray.py::test_astype[[]-float64-float64]
 tests/test_dparray.py::test_astype[[]-float64-float32]
 tests/test_dparray.py::test_astype[[]-float64-int64]
 tests/test_dparray.py::test_astype[[]-float64-int32]
 tests/test_dparray.py::test_astype[[]-float64-bool]
-tests/test_dparray.py::test_astype[[]-float64-bool_]
 tests/test_dparray.py::test_astype[[]-float64-complex]
 tests/test_dparray.py::test_astype[[]-float32-float64]
 tests/test_dparray.py::test_astype[[]-float32-float32]
 tests/test_dparray.py::test_astype[[]-float32-int64]
 tests/test_dparray.py::test_astype[[]-float32-int32]
 tests/test_dparray.py::test_astype[[]-float32-bool]
-tests/test_dparray.py::test_astype[[]-float32-bool_]
 tests/test_dparray.py::test_astype[[]-float32-complex]
 tests/test_dparray.py::test_astype[[]-int64-float64]
 tests/test_dparray.py::test_astype[[]-int64-float32]
 tests/test_dparray.py::test_astype[[]-int64-int64]
 tests/test_dparray.py::test_astype[[]-int64-int32]
 tests/test_dparray.py::test_astype[[]-int64-bool]
-tests/test_dparray.py::test_astype[[]-int64-bool_]
 tests/test_dparray.py::test_astype[[]-int64-complex]
 tests/test_dparray.py::test_astype[[]-int32-float64]
 tests/test_dparray.py::test_astype[[]-int32-float32]
 tests/test_dparray.py::test_astype[[]-int32-int64]
 tests/test_dparray.py::test_astype[[]-int32-int32]
 tests/test_dparray.py::test_astype[[]-int32-bool]
-tests/test_dparray.py::test_astype[[]-int32-bool_]
 tests/test_dparray.py::test_astype[[]-int32-complex]
 tests/test_dparray.py::test_astype[[]-bool-float64]
 tests/test_dparray.py::test_astype[[]-bool-float32]
 tests/test_dparray.py::test_astype[[]-bool-int64]
 tests/test_dparray.py::test_astype[[]-bool-int32]
 tests/test_dparray.py::test_astype[[]-bool-bool]
-tests/test_dparray.py::test_astype[[]-bool-bool_]
 tests/test_dparray.py::test_astype[[]-bool-complex]
-tests/test_dparray.py::test_astype[[]-bool_-float64]
-tests/test_dparray.py::test_astype[[]-bool_-float32]
-tests/test_dparray.py::test_astype[[]-bool_-int64]
-tests/test_dparray.py::test_astype[[]-bool_-int32]
-tests/test_dparray.py::test_astype[[]-bool_-bool]
-tests/test_dparray.py::test_astype[[]-bool_-bool_]
-tests/test_dparray.py::test_astype[[]-bool_-complex]
 tests/test_dparray.py::test_astype[[]-complex-float64]
 tests/test_dparray.py::test_astype[[]-complex-float32]
 tests/test_dparray.py::test_astype[[]-complex-int64]
 tests/test_dparray.py::test_astype[[]-complex-int32]
 tests/test_dparray.py::test_astype[[]-complex-bool]
-tests/test_dparray.py::test_astype[[]-complex-bool_]
 tests/test_dparray.py::test_astype[[]-complex-complex]
 
 tests/test_linalg.py::test_cond[-1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]
@@ -577,7 +564,6 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asar
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_ascontiguousarray_on_noncontiguous_array
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim_dtype
-tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_fromfile
 
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1
@@ -870,7 +856,7 @@ tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_arra
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_length
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_is_equal
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_not_equal
-tests/third_party/cupy/logic_tests/test_comparison.py::TestComparisonOperator::test_binary_npscalar_array
+
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast_arrays
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_10_{shapes=[(0, 1, 1, 0, 3), (5, 2, 0, 1, 0, 0, 3), (2, 1, 0, 0, 0, 3)]}::test_broadcast
@@ -981,6 +967,7 @@ tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_4_{reps
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_5_{reps=(2, 3, 4, 5)}::test_array_tile
 tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_2
 tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3
+
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_279_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.],       [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_287_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.],       [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
@@ -997,19 +984,13 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0., 1., 2.],       [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input
+
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_2_{name='add', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract
+
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2
 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_copysign_float
diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py
index 5bb9795bbac8..63435bca11f0 100644
--- a/tests/test_arraycreation.py
+++ b/tests/test_arraycreation.py
@@ -1,4 +1,5 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 
@@ -8,24 +9,13 @@
 import numpy
 from numpy.testing import (
     assert_allclose,
+    assert_almost_equal,
     assert_array_equal,
     assert_raises
 )
 
 import tempfile
-
-
-# TODO: discuss with DPCTL why no exception on complex128
-def is_dtype_supported(dtype, no_complex_check=False):
-    device = dpctl.SyclQueue().sycl_device
-
-    if dtype is dpnp.float16 and not device.has_aspect_fp16:
-        return False
-    if dtype is dpnp.float64 and not device.has_aspect_fp64:
-        return False
-    if dtype is dpnp.complex128 and not device.has_aspect_fp64 and not no_complex_check:
-        return False
-    return True
+import operator
 
 
 @pytest.mark.parametrize("start",
@@ -37,11 +27,7 @@ def is_dtype_supported(dtype, no_complex_check=False):
 @pytest.mark.parametrize("step",
                          [None, 1, 2.7, -1.6, 100],
                          ids=['None', '1', '2.7', '-1.6', '100'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32],
-                         ids=['complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_float16=False))
 def test_arange(start, stop, step, dtype):
     rtol_mult = 2
     if numpy.issubdtype(dtype, numpy.float16):
@@ -50,26 +36,23 @@ def test_arange(start, stop, step, dtype):
 
     func = lambda xp: xp.arange(start, stop=stop, step=step, dtype=dtype)
 
-    if not is_dtype_supported(dtype):
-        if stop is None:
-            _stop, _start = start, 0
-        else:
-            _stop, _start = stop, start
-        _step = 1 if step is None else step
-
-        if _start == _stop:
-            pass
-        elif (_step < 0) ^ (_start < _stop):
-            # exception is raising when dpctl calls a kernel function,
-            # i.e. when resulting array is not empty
-            assert_raises(RuntimeError, func, dpnp)
-            return
-
     exp_array = func(numpy)
     res_array = func(dpnp).asnumpy()
 
-    if numpy.issubdtype(dtype, numpy.floating) or numpy.issubdtype(dtype, numpy.complexfloating):
-        assert_allclose(exp_array, res_array, rtol=rtol_mult*numpy.finfo(dtype).eps)
+    if dtype is None:
+        _device = dpctl.SyclQueue().sycl_device
+        if not _device.has_aspect_fp64:
+            # numpy allocated array with dtype=float64 by default,
+            # while dpnp might use float32, if float64 isn't supported by device
+            _dtype = dpnp.float32
+            rtol_mult *= 150
+        else:
+            _dtype = dpnp.float64
+    else:
+        _dtype = dtype
+
+    if numpy.issubdtype(_dtype, numpy.floating) or numpy.issubdtype(_dtype, numpy.complexfloating):
+        assert_allclose(exp_array, res_array, rtol=rtol_mult*numpy.finfo(_dtype).eps)
     else:
         assert_array_equal(exp_array, res_array)
 
@@ -101,43 +84,33 @@ def test_diag(v, k):
 
 
 @pytest.mark.parametrize("N",
-                         [0, 1, 2, 3, 4],
-                         ids=['0', '1', '2', '3', '4'])
+                         [0, 1, 2, 3],
+                         ids=['0', '1', '2', '3'])
 @pytest.mark.parametrize("M",
-                         [None, 0, 1, 2, 3, 4],
-                         ids=['None', '0', '1', '2', '3', '4'])
+                         [None, 0, 1, 2, 3],
+                         ids=['None', '0', '1', '2', '3'])
 @pytest.mark.parametrize("k",
-                         [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5],
-                         ids=['-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
-def test_eye(N, M, k, dtype):
-    expected = numpy.eye(N, M=M, k=k, dtype=dtype)
-    result = dpnp.eye(N, M=M, k=k, dtype=dtype)
-    assert_array_equal(expected, result)
+                         [-4, -3, -2, -1, 0, 1, 2, 3, 4],
+                         ids=['-4', '-3', '-2', '-1', '0', '1', '2', '3', '4'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
+@pytest.mark.parametrize("order",
+                         [None, "C", "F"],
+                         ids=['None', 'C', 'F'])
+def test_eye(N, M, k, dtype, order):
+    func = lambda xp: xp.eye(N, M, k=k, dtype=dtype, order=order)
+    assert_array_equal(func(numpy), func(dpnp))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_frombuffer(dtype):
-    buffer = b'12345678'
+    buffer = b'12345678ABCDEF00'
     func = lambda xp: xp.frombuffer(buffer, dtype=dtype)
-
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, func, dpnp)
-        return
-
-    assert_array_equal(func(dpnp), func(numpy))
+    assert_allclose(func(dpnp), func(numpy))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
 def test_fromfile(dtype):
     with tempfile.TemporaryFile() as fh:
         fh.write(b"\x00\x01\x02\x03\x04\x05\x06\x07\x08")
@@ -145,76 +118,44 @@ def test_fromfile(dtype):
 
         func = lambda xp: xp.fromfile(fh, dtype=dtype)
 
-        if not is_dtype_supported(dtype):
-            fh.seek(0)
-            # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-            assert_raises(ValueError, func, dpnp)
-            return
-
         fh.seek(0)
         np_res = func(numpy)
 
         fh.seek(0)
         dpnp_res = func(dpnp)
 
-        assert_array_equal(dpnp_res, np_res)
+        assert_almost_equal(dpnp_res, np_res)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_float16=False))
 def test_fromfunction(dtype):
     def func(x, y):
         return x * y
 
     shape = (3, 3)
     call_func = lambda xp: xp.fromfunction(func, shape=shape, dtype=dtype)
-
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, call_func, dpnp)
-        return
-
     assert_array_equal(call_func(dpnp), call_func(numpy))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_fromiter(dtype):
     _iter = [1, 2, 3, 4]
     func = lambda xp: xp.fromiter(_iter, dtype=dtype)
-
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, func, dpnp)
-        return
-
     assert_array_equal(func(dpnp), func(numpy))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_fromstring(dtype):
     string = "1 2 3 4"
     func = lambda xp: xp.fromstring(string, dtype=dtype, sep=' ')
-
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, func, dpnp)
-        return
-
     assert_array_equal(func(dpnp), func(numpy))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
 @pytest.mark.parametrize("num",
                          [2, 4, 8, 3, 9, 27])
 @pytest.mark.parametrize("endpoint",
@@ -225,11 +166,6 @@ def test_geomspace(dtype, num, endpoint):
 
     func = lambda xp: xp.geomspace(start, stop, num, endpoint, dtype)
 
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, func, dpnp)
-        return
-
     np_res = func(numpy)
     dpnp_res = func(dpnp)
 
@@ -244,25 +180,14 @@ def test_geomspace(dtype, num, endpoint):
 @pytest.mark.parametrize("n",
                          [0, 1, 4],
                          ids=['0', '1', '4'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32,
-                          numpy.bool, numpy.complex64, numpy.complex128, None],
-                         ids=['float64', 'float32', 'int64', 'int32',
-                              'bool', 'complex64', 'complex128', 'None'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
 def test_identity(n, dtype):
     func = lambda xp: xp.identity(n, dtype=dtype)
-
-    if n > 0 and not is_dtype_supported(dtype):
-        assert_raises(RuntimeError, func, dpnp)
-        return
-
     assert_array_equal(func(numpy), func(dpnp))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_loadtxt(dtype):
     func = lambda xp: xp.loadtxt(fh, dtype=dtype)
 
@@ -270,12 +195,6 @@ def test_loadtxt(dtype):
         fh.write(b"1 2 3 4")
         fh.flush()
 
-        if not is_dtype_supported(dtype):
-            # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-            fh.seek(0)
-            assert_raises(ValueError, func, dpnp)
-            return
-
         fh.seek(0)
         np_res = func(numpy)
         fh.seek(0)
@@ -284,12 +203,8 @@ def test_loadtxt(dtype):
         assert_array_equal(dpnp_res, np_res)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, None],
-                         ids=['float64', 'float32', 'int64', 'int32', 'None'])
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("offset",
                          [0, 1],
                          ids=['0', '1'])
@@ -317,21 +232,9 @@ def test_trace(array, offset, type, dtype):
     create_array = lambda xp: xp.array(array, type)
     trace_func = lambda xp, x: xp.trace(x, offset=offset, dtype=dtype)
 
-    if not is_dtype_supported(type):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, create_array, dpnp)
-        return
-
     a = create_array(numpy)
     ia = create_array(dpnp)
-
-    if not is_dtype_supported(dtype):
-        assert_raises(RuntimeError, trace_func, dpnp, ia)
-        return
-
-    expected = trace_func(numpy, a)
-    result = trace_func(dpnp, ia)
-    assert_array_equal(expected, result)
+    assert_array_equal(trace_func(dpnp, ia), trace_func(numpy, a))
 
 
 @pytest.mark.parametrize("N",
@@ -343,16 +246,9 @@ def test_trace(array, offset, type, dtype):
 @pytest.mark.parametrize("k",
                          [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5],
                          ids=['-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, float, numpy.int64, numpy.int32, numpy.int, numpy.float, int],
-                         ids=['float64', 'float32', 'numpy.float', 'float', 'int64', 'int32', 'numpy.int', 'int'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_tri(N, M, k, dtype):
     func = lambda xp: xp.tri(N, M, k, dtype=dtype)
-
-    if M > 0 and N > 0 and not is_dtype_supported(dtype):
-        assert_raises(RuntimeError, func, dpnp)
-        return
-
     assert_array_equal(func(dpnp), func(numpy))
 
 
@@ -363,48 +259,50 @@ def test_tri_default_dtype():
 
 
 @pytest.mark.parametrize("k",
-                         [-6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6],
-                         ids=['-6', '-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5', '6'])
+                         [-3, -2, -1, 0, 1, 2, 3, 4, 5,
+                          numpy.array(1), dpnp.array(2), dpt.asarray(3)],
+                         ids=['-3', '-2', '-1', '0', '1', '2', '3', '4', '5',
+                              'np.array(1)', 'dpnp.array(2)', 'dpt.asarray(3)'])
 @pytest.mark.parametrize("m",
-                         [[0, 1, 2, 3, 4],
-                          [1, 1, 1, 1, 1],
-                          [[0, 0], [0, 0]],
+                         [[[0, 0], [0, 0]],
                           [[1, 2], [1, 2]],
                           [[1, 2], [3, 4]],
                           [[0, 1, 2], [3, 4, 5], [6, 7, 8]],
                           [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]],
-                         ids=['[0, 1, 2, 3, 4]',
-                              '[1, 1, 1, 1, 1]',
-                              '[[0, 0], [0, 0]]',
+                         ids=['[[0, 0], [0, 0]]',
                               '[[1, 2], [1, 2]]',
                               '[[1, 2], [3, 4]]',
                               '[[0, 1, 2], [3, 4, 5], [6, 7, 8]]',
                               '[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]'])
-def test_tril(m, k):
-    a = numpy.array(m)
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
+def test_tril(m, k, dtype):
+    a = numpy.array(m, dtype=dtype)
     ia = dpnp.array(a)
-    expected = numpy.tril(a, k)
-    result = dpnp.tril(ia, k)
+    expected = numpy.tril(a, k=k)
+    result = dpnp.tril(ia, k=k)
     assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("k",
-                         [-4, -3, -2, -1, 0, 1, 2, 3, 4],
-                         ids=['-4', '-3', '-2', '-1', '0', '1', '2', '3', '4'])
+                         [-3, -2, -1, 0, 1, 2, 3, 4, 5,
+                          numpy.array(1), dpnp.array(2), dpt.asarray(3)],
+                         ids=['-3', '-2', '-1', '0', '1', '2', '3', '4', '5',
+                              'np.array(1)', 'dpnp.array(2)', 'dpt.asarray(3)'])
 @pytest.mark.parametrize("m",
-                         [[0, 1, 2, 3, 4],
-                          [[1, 2], [3, 4]],
+                         [[[1, 2], [3, 4]],
                           [[0, 1, 2], [3, 4, 5], [6, 7, 8]],
                           [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]],
-                         ids=['[0, 1, 2, 3, 4]',
-                              '[[1, 2], [3, 4]]',
+                         ids=['[[1, 2], [3, 4]]',
                               '[[0, 1, 2], [3, 4, 5], [6, 7, 8]]',
                               '[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]'])
-def test_triu(m, k):
-    a = numpy.array(m)
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
+def test_triu(m, k, dtype):
+    a = numpy.array(m, dtype=dtype)
     ia = dpnp.array(a)
-    expected = numpy.triu(a, k)
-    result = dpnp.triu(ia, k)
+    expected = numpy.triu(a, k=k)
+    result = dpnp.triu(ia, k=k)
     assert_array_equal(expected, result)
 
 
@@ -414,8 +312,8 @@ def test_triu(m, k):
 def test_triu_size_null(k):
     a = numpy.ones(shape=(1, 2, 0))
     ia = dpnp.array(a)
-    expected = numpy.triu(a, k)
-    result = dpnp.triu(ia, k)
+    expected = numpy.triu(a, k=k)
+    result = dpnp.triu(ia, k=k)
     assert_array_equal(expected, result)
 
 
@@ -426,11 +324,7 @@ def test_triu_size_null(k):
                          ids=['[1, 2, 3, 4]',
                               '[]',
                               '[0, 3, 5]'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32,
-                          numpy.bool, numpy.complex64, numpy.complex128],
-                         ids=['float64', 'float32', 'int64', 'int32',
-                              'bool', 'complex64', 'complex128'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
 @pytest.mark.parametrize("n",
                          [0, 1, 4, None],
                          ids=['0', '1', '4', 'None'])
@@ -441,18 +335,8 @@ def test_vander(array, dtype, n, increase):
     create_array = lambda xp: xp.array(array, dtype=dtype)
     vander_func = lambda xp, x: xp.vander(x, N=n, increasing=increase)
 
-    if array and not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, create_array, dpnp)
-        return
-
     a_np = numpy.array(array, dtype=dtype)
     a_dpnp = dpnp.array(array, dtype=dtype)
-
-    if array and not is_dtype_supported(dtype):
-        assert_raises(RuntimeError, vander_func, dpnp, a_dpnp)
-        return
-
     assert_array_equal(vander_func(numpy, a_np), vander_func(dpnp, a_dpnp))
 
 
@@ -462,21 +346,12 @@ def test_vander(array, dtype, n, increase):
 @pytest.mark.parametrize("fill_value",
                          [1.5, 2, 1.5+0.j],
                          ids=['1.5', '2', '1.5+0.j'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_full(shape, fill_value, dtype, order):
     func = lambda xp: xp.full(shape, fill_value, dtype=dtype, order=order)
-
-    if shape != 0 and not 0 in shape and not is_dtype_supported(dtype, no_complex_check=True):
-        assert_raises(RuntimeError, func, dpnp)
-        return
-
     assert_array_equal(func(numpy), func(dpnp))
 
 
@@ -486,23 +361,15 @@ def test_full(shape, fill_value, dtype, order):
 @pytest.mark.parametrize("fill_value",
                          [1.5, 2, 1.5+0.j],
                          ids=['1.5', '2', '1.5+0.j'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_full_like(array, fill_value, dtype, order):
-    a = numpy.array(array)
-    ia = dpnp.array(array)
     func = lambda xp, x: xp.full_like(x, fill_value, dtype=dtype, order=order)
 
-    if ia.size and not is_dtype_supported(dtype, no_complex_check=True):
-        assert_raises(RuntimeError, func, dpnp, ia)
-        return
-    
+    a = numpy.array(array)
+    ia = dpnp.array(array)
     assert_array_equal(func(numpy, a), func(dpnp, ia))
 
 
@@ -534,7 +401,9 @@ def test_full_strides():
     assert_array_equal(dpnp.asnumpy(ia), a)
 
 
-@pytest.mark.parametrize("fill_value", [[], (), dpnp.full(0, 0)], ids=['[]', '()', 'dpnp.full(0, 0)'])
+@pytest.mark.parametrize("fill_value",
+                         [[], (), dpnp.full(0, 0)],
+                         ids=['[]', '()', 'dpnp.full(0, 0)'])
 def test_full_invalid_fill_value(fill_value):
     with pytest.raises(ValueError):
         dpnp.full(10, fill_value=fill_value)
@@ -543,120 +412,102 @@ def test_full_invalid_fill_value(fill_value):
 @pytest.mark.parametrize("shape",
                          [(), 0, (0,), (2, 0, 3), (3, 2)],
                          ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_zeros(shape, dtype, order):
-    expected = numpy.zeros(shape, dtype=dtype, order=order)
-    result = dpnp.zeros(shape, dtype=dtype, order=order)
-
-    assert_array_equal(expected, result)
+    func = lambda xp: xp.zeros(shape, dtype=dtype, order=order)
+    assert_array_equal(func(numpy), func(dpnp))
 
 
 @pytest.mark.parametrize("array",
                          [[], 0,  [1, 2, 3], [[1, 2], [3, 4]]],
                          ids=['[]', '0',  '[1, 2, 3]', '[[1, 2], [3, 4]]'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_zeros_like(array, dtype, order):
+    func = lambda xp, x: xp.zeros_like(x, dtype=dtype, order=order)
+
     a = numpy.array(array)
     ia = dpnp.array(array)
-
-    expected = numpy.zeros_like(a, dtype=dtype, order=order)
-    result = dpnp.zeros_like(ia, dtype=dtype, order=order)
-
-    assert_array_equal(expected, result)
+    assert_array_equal(func(numpy, a), func(dpnp, ia))
 
 
 @pytest.mark.parametrize("shape",
                          [(), 0, (0,), (2, 0, 3), (3, 2)],
                          ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_empty(shape, dtype, order):
-    expected = numpy.empty(shape, dtype=dtype, order=order)
-    result = dpnp.empty(shape, dtype=dtype, order=order)
-
-    assert expected.shape == result.shape
+    func = lambda xp: xp.empty(shape, dtype=dtype, order=order)
+    assert func(numpy).shape == func(dpnp).shape
 
 
 @pytest.mark.parametrize("array",
                          [[], 0,  [1, 2, 3], [[1, 2], [3, 4]]],
                          ids=['[]', '0',  '[1, 2, 3]', '[[1, 2], [3, 4]]'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_empty_like(array, dtype, order):
+    func = lambda xp, x: xp.empty_like(x, dtype=dtype, order=order)
+
     a = numpy.array(array)
     ia = dpnp.array(array)
-
-    expected = numpy.empty_like(a, dtype=dtype, order=order)
-    result = dpnp.empty_like(ia, dtype=dtype, order=order)
-
-    assert expected.shape == result.shape
+    assert func(numpy, a).shape == func(dpnp, ia).shape
 
 
 @pytest.mark.parametrize("shape",
                          [(), 0, (0,), (2, 0, 3), (3, 2)],
                          ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, 
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                         'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_ones(shape, dtype, order):
     func = lambda xp: xp.ones(shape, dtype=dtype, order=order)
-
-    if shape != 0 and not 0 in shape and not is_dtype_supported(dtype, no_complex_check=True):
-        assert_raises(RuntimeError, func, dpnp)
-        return
-
     assert_array_equal(func(numpy), func(dpnp))
 
 
 @pytest.mark.parametrize("array",
                          [[], 0,  [1, 2, 3], [[1, 2], [3, 4]]],
                          ids=['[]', '0',  '[1, 2, 3]', '[[1, 2], [3, 4]]'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, 
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                         'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_ones_like(array, dtype, order):
+    func = lambda xp, x: xp.ones_like(x, dtype=dtype, order=order)
+
     a = numpy.array(array)
     ia = dpnp.array(array)
-    func = lambda xp, x: xp.ones_like(x, dtype=dtype, order=order)
+    assert_array_equal(func(numpy, a), func(dpnp, ia))
 
-    if ia.size and not is_dtype_supported(dtype, no_complex_check=True):
-        assert_raises(RuntimeError, func, dpnp, ia)
-        return
 
-    assert_array_equal(func(numpy, a), func(dpnp, ia))
+@pytest.mark.parametrize(
+    "func, args",
+    [
+        pytest.param("full_like",
+                     ['x0', '4']),
+        pytest.param("zeros_like",
+                     ['x0']),
+        pytest.param("ones_like",
+                     ['x0']),
+        pytest.param("empty_like",
+                     ['x0']),
+    ])
+def test_dpctl_tensor_input(func, args):
+    x0 = dpt.reshape(dpt.arange(9), (3,3))
+    new_args = [eval(val, {'x0' : x0}) for val in args]
+    X = getattr(dpt, func)(*new_args)
+    Y = getattr(dpnp, func)(*new_args)
+    if func is 'empty_like':
+        assert X.shape == Y.shape
+    else:
+        assert_array_equal(X, Y)
diff --git a/tests/test_dparray.py b/tests/test_dparray.py
index 6ff1672b853b..62a0120f8a33 100644
--- a/tests/test_dparray.py
+++ b/tests/test_dparray.py
@@ -1,15 +1,17 @@
-import dpnp
-import numpy
 import pytest
+from .helper import get_all_dtypes
+
+import dpnp
 import dpctl.tensor as dpt
 
+import numpy
+from numpy.testing import (
+    assert_array_equal
+)
+
 
-@pytest.mark.parametrize("res_dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_, numpy.complex],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_', 'complex'])
-@pytest.mark.parametrize("arr_dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_, numpy.complex],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_', 'complex'])
+@pytest.mark.parametrize("res_dtype", get_all_dtypes())
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes())
 @pytest.mark.parametrize("arr",
                          [[-2, -1, 0, 1, 2], [[-2, -1], [1, 2]], []],
                          ids=['[-2, -1, 0, 1, 2]', '[[-2, -1], [1, 2]]', '[]'])
@@ -18,12 +20,10 @@ def test_astype(arr, arr_dtype, res_dtype):
     dpnp_array = dpnp.array(numpy_array)
     expected = numpy_array.astype(res_dtype)
     result = dpnp_array.astype(res_dtype)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
-@pytest.mark.parametrize("arr_dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_, numpy.complex],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_', 'complex'])
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes())
 @pytest.mark.parametrize("arr",
                          [[-2, -1, 0, 1, 2], [[-2, -1], [1, 2]], []],
                          ids=['[-2, -1, 0, 1, 2]', '[[-2, -1], [1, 2]]', '[]'])
@@ -32,7 +32,7 @@ def test_flatten(arr, arr_dtype):
     dpnp_array = dpnp.array(arr, dtype=arr_dtype)
     expected = numpy_array.flatten()
     result = dpnp_array.flatten()
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("shape",
@@ -51,7 +51,7 @@ def test_flags(shape, order):
 
 
 @pytest.mark.parametrize("dtype",
-                         [numpy.complex64, numpy.float32, numpy.int64, numpy.int32, numpy.bool],
+                         [numpy.complex64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_],
                          ids=['complex64', 'float32', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("strides",
                          [(1, 4) , (4, 1)],
@@ -68,3 +68,134 @@ def test_flags_strides(dtype, order, strides):
     assert usm_array.flags == dpnp_array.flags
     assert numpy_array.flags.c_contiguous == dpnp_array.flags.c_contiguous
     assert numpy_array.flags.f_contiguous == dpnp_array.flags.f_contiguous
+
+def test_print_dpnp_int():
+    result = repr(dpnp.array([1, 0, 2, -3, -1, 2, 21, -9], dtype='i4'))
+    expected = "array([ 1,  0,  2, -3, -1,  2, 21, -9], dtype=int32)"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, 0, 2, -3, -1, 2, 21, -9], dtype='i4'))
+    expected = "[ 1  0  2 -3 -1  2 21 -9]"
+    assert(result==expected)
+# int32
+    result = repr(dpnp.array([1, -1, 21], dtype=dpnp.int32))
+    expected = "array([ 1, -1, 21], dtype=int32)"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, -1, 21], dtype=dpnp.int32))
+    expected = "[ 1 -1 21]"
+    assert(result==expected)
+# uint8
+    result = repr(dpnp.array([1, 0, 3], dtype=numpy.uint8))
+    expected = "array([1, 0, 3], dtype=uint8)"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, 0, 3], dtype=numpy.uint8))
+    expected = "[1 0 3]"
+    assert(result==expected)
+
+def test_print_dpnp_float():
+    result = repr(dpnp.array([1, -1, 21], dtype=float))
+    expected = "array([ 1., -1., 21.])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, -1, 21], dtype=float))
+    expected = "[ 1. -1. 21.]"
+    assert(result==expected)
+# float32
+    result = repr(dpnp.array([1, -1, 21], dtype=dpnp.float32))
+    expected = "array([ 1., -1., 21.], dtype=float32)"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, -1, 21], dtype=dpnp.float32))
+    expected = "[ 1. -1. 21.]"
+    assert(result==expected)
+
+def test_print_dpnp_complex():
+    result = repr(dpnp.array([1, -1, 21], dtype=complex))
+    expected = "array([ 1.+0.j, -1.+0.j, 21.+0.j])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, -1, 21], dtype=complex))
+    expected = "[ 1.+0.j -1.+0.j 21.+0.j]"
+    assert(result==expected)
+
+def test_print_dpnp_boolean():
+    result = repr(dpnp.array([1, 0, 3], dtype=bool))
+    expected = "array([ True, False,  True])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, 0, 3], dtype=bool))
+    expected = "[ True False  True]"
+    assert(result==expected)
+
+def test_print_dpnp_special_character():
+# NaN
+    result = repr(dpnp.array([1., 0., dpnp.nan, 3.]))
+    expected = "array([ 1.,  0., nan,  3.])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1., 0., dpnp.nan, 3.]))
+    expected = "[ 1.  0. nan  3.]"
+    assert(result==expected)
+# inf
+    result = repr(dpnp.array([1., 0., numpy.inf, 3.]))
+    expected = "array([ 1.,  0., inf,  3.])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1., 0., numpy.inf, 3.]))
+    expected = "[ 1.  0. inf  3.]"
+    assert(result==expected)
+
+def test_print_dpnp_nd():
+# 1D
+    result = repr(dpnp.arange(10000, dtype='float32'))
+    expected = "array([0.000e+00, 1.000e+00, 2.000e+00, ..., 9.997e+03, 9.998e+03,\n       9.999e+03], dtype=float32)"
+    assert(result==expected)
+
+    result = str(dpnp.arange(10000, dtype='float32'))
+    expected = "[0.000e+00 1.000e+00 2.000e+00 ... 9.997e+03 9.998e+03 9.999e+03]"
+    assert(result==expected)
+
+# 2D
+    result = repr(dpnp.array([[1, 2], [3, 4]], dtype=float))
+    expected = "array([[1., 2.],\n       [3., 4.]])"
+    assert(result==expected)
+
+    result = str(dpnp.array([[1, 2], [3, 4]]))
+    expected = "[[1 2]\n [3 4]]"
+    assert(result==expected)
+
+# 0 shape
+    result = repr(dpnp.empty( shape=(0, 0) ))
+    expected = "array([])"
+    assert(result==expected)
+
+    result = str(dpnp.empty( shape=(0, 0) ))
+    expected = "[]"
+    assert(result==expected)
+
+@pytest.mark.parametrize("func", [bool, float, int, complex])
+@pytest.mark.parametrize("shape", [tuple(), (1,), (1, 1), (1, 1, 1)])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False, no_complex=True))
+def test_scalar_type_casting(func, shape, dtype):
+    numpy_array = numpy.full(shape, 5, dtype=dtype)
+    dpnp_array = dpnp.full(shape, 5, dtype=dtype)
+    assert func(numpy_array) == func(dpnp_array)
+
+
+@pytest.mark.parametrize("method", ["__bool__", "__float__", "__int__", "__complex__"])
+@pytest.mark.parametrize("shape", [tuple(), (1,), (1, 1), (1, 1, 1)])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False, no_complex=True, no_none=True))
+def test_scalar_type_casting_by_method(method, shape, dtype):
+    numpy_array = numpy.full(shape, 4.7, dtype=dtype)
+    dpnp_array = dpnp.full(shape, 4.7, dtype=dtype)
+    assert getattr(numpy_array, method)() == getattr(dpnp_array, method)()
+
+
+@pytest.mark.parametrize("shape", [(1,), (1, 1), (1, 1, 1)])
+@pytest.mark.parametrize("index_dtype", [dpnp.int32, dpnp.int64])
+def test_array_as_index(shape, index_dtype):
+    ind_arr = dpnp.ones(shape, dtype=index_dtype)
+    a = numpy.arange(ind_arr.size + 1)
+    assert a[tuple(ind_arr)] == a[1]
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 091cf1345c4e..1a40777afac8 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -3,6 +3,9 @@
 import dpnp
 
 import numpy
+from numpy.testing import (
+    assert_array_equal
+)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -16,7 +19,7 @@ def test_choose():
 
     expected = numpy.choose([0, 0, 0, 0], [a, b, c])
     result = dpnp.choose([0, 0, 0, 0], [ia, ib, ic])
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("offset",
@@ -47,7 +50,7 @@ def test_diagonal(array, offset):
     ia = dpnp.array(a)
     expected = numpy.diagonal(a, offset)
     result = dpnp.diagonal(ia, offset)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("val",
@@ -71,7 +74,7 @@ def test_fill_diagonal(array, val):
     ia = dpnp.array(a)
     expected = numpy.fill_diagonal(a, val)
     result = dpnp.fill_diagonal(ia, val)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("dimension",
@@ -81,7 +84,7 @@ def test_fill_diagonal(array, val):
 def test_indices(dimension):
     expected = numpy.indices(dimension)
     result = dpnp.indices(dimension)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("array",
@@ -107,7 +110,7 @@ def test_nonzero(array):
     ia = dpnp.array(array)
     expected = numpy.nonzero(a)
     result = dpnp.nonzero(ia)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -137,7 +140,7 @@ def test_place1(arr, mask, vals):
     im = dpnp.array(m)
     numpy.place(a, m, vals)
     dpnp.place(ia, im, vals)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -161,7 +164,7 @@ def test_place2(arr, mask, vals):
     im = dpnp.array(m)
     numpy.place(a, m, vals)
     dpnp.place(ia, im, vals)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -186,7 +189,7 @@ def test_place3(arr, mask, vals):
     im = dpnp.array(m)
     numpy.place(a, m, vals)
     dpnp.place(ia, im, vals)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.parametrize("v",
@@ -211,7 +214,7 @@ def test_put(array, ind, v):
     ia = dpnp.array(a)
     numpy.put(a, ind, v)
     dpnp.put(ia, ind, v)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.parametrize("v",
@@ -236,7 +239,7 @@ def test_put2(array, ind, v):
     ia = dpnp.array(a)
     numpy.put(a, ind, v)
     dpnp.put(ia, ind, v)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 def test_put3():
@@ -244,7 +247,7 @@ def test_put3():
     ia = dpnp.array(a)
     dpnp.put(ia, [0, 2], [-44, -55])
     numpy.put(a, [0, 2], [-44, -55])
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -256,7 +259,7 @@ def test_put_along_axis_val_int():
     for axis in range(2):
         numpy.put_along_axis(a, ind_r, 777, axis)
         dpnp.put_along_axis(ai, ind_r_i, 777, axis)
-        numpy.testing.assert_array_equal(a, ai)
+        assert_array_equal(a, ai)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -268,7 +271,7 @@ def test_put_along_axis1():
     for axis in range(3):
         numpy.put_along_axis(a, ind_r, 777, axis)
         dpnp.put_along_axis(ai, ind_r_i, 777, axis)
-        numpy.testing.assert_array_equal(a, ai)
+        assert_array_equal(a, ai)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -280,7 +283,7 @@ def test_put_along_axis2():
     for axis in range(3):
         numpy.put_along_axis(a, ind_r, [100, 200, 300, 400], axis)
         dpnp.put_along_axis(ai, ind_r_i, [100, 200, 300, 400], axis)
-        numpy.testing.assert_array_equal(a, ai)
+        assert_array_equal(a, ai)
 
 
 @pytest.mark.parametrize("vals",
@@ -309,7 +312,7 @@ def test_putmask1(arr, mask, vals):
     iv = dpnp.array(v)
     numpy.putmask(a, m, v)
     dpnp.putmask(ia, im, iv)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.parametrize("vals",
@@ -334,7 +337,7 @@ def test_putmask2(arr, mask, vals):
     iv = dpnp.array(v)
     numpy.putmask(a, m, v)
     dpnp.putmask(ia, im, iv)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.parametrize("vals",
@@ -360,7 +363,7 @@ def test_putmask3(arr, mask, vals):
     iv = dpnp.array(v)
     numpy.putmask(a, m, v)
     dpnp.putmask(ia, im, iv)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 def test_select():
@@ -378,7 +381,7 @@ def test_select():
     ichoicelist = [ichoice_val1, ichoice_val2]
     expected = numpy.select(condlist, choicelist)
     result = dpnp.select(icondlist, ichoicelist)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("array_type",
@@ -415,10 +418,9 @@ def test_take(array, indices, array_type, indices_type):
     iind = dpnp.array(ind)
     expected = numpy.take(a, ind)
     result = dpnp.take(ia, iind)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_take_along_axis():
     a = numpy.arange(16).reshape(4, 4)
     ai = dpnp.array(a)
@@ -427,10 +429,9 @@ def test_take_along_axis():
     for axis in range(2):
         expected = numpy.take_along_axis(a, ind_r, axis)
         result = dpnp.take_along_axis(ai, ind_r_i, axis)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_take_along_axis1():
     a = numpy.arange(64).reshape(4, 4, 4)
     ai = dpnp.array(a)
@@ -439,7 +440,7 @@ def test_take_along_axis1():
     for axis in range(3):
         expected = numpy.take_along_axis(a, ind_r, axis)
         result = dpnp.take_along_axis(ai, ind_r_i, axis)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("m",
@@ -454,7 +455,7 @@ def test_take_along_axis1():
 def test_tril_indices(n, k, m):
     result = dpnp.tril_indices(n, k, m)
     expected = numpy.tril_indices(n, k, m)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("k",
@@ -472,7 +473,7 @@ def test_tril_indices_from(array, k):
     ia = dpnp.array(a)
     result = dpnp.tril_indices_from(ia, k)
     expected = numpy.tril_indices_from(a, k)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("m",
@@ -487,7 +488,7 @@ def test_tril_indices_from(array, k):
 def test_triu_indices(n, k, m):
     result = dpnp.triu_indices(n, k, m)
     expected = numpy.triu_indices(n, k, m)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("k",
@@ -505,4 +506,4 @@ def test_triu_indices_from(array, k):
     ia = dpnp.array(a)
     result = dpnp.triu_indices_from(ia, k)
     expected = numpy.triu_indices_from(a, k)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index dd89a18adbd6..d9784a41558f 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -1,9 +1,15 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp as inp
 
 import dpctl
+
 import numpy
+from numpy.testing import (
+    assert_allclose,
+    assert_array_equal
+)
 
 
 def vvsort(val, vec, size, xp):
@@ -49,7 +55,7 @@ def test_cholesky(array):
     ia = inp.array(a)
     result = inp.linalg.cholesky(ia)
     expected = numpy.linalg.cholesky(a)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("arr",
@@ -63,7 +69,7 @@ def test_cond(arr, p):
     ia = inp.array(a)
     result = inp.linalg.cond(ia, p)
     expected = numpy.linalg.cond(a, p)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("array",
@@ -82,13 +88,11 @@ def test_det(array):
     ia = inp.array(a)
     result = inp.linalg.det(ia)
     expected = numpy.linalg.det(a)
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("size",
                          [2, 4, 8, 16, 300])
 def test_eig_arange(type, size):
@@ -115,21 +119,19 @@ def test_eig_arange(type, size):
         if np_vec[0, i] * dpnp_vec[0, i] < 0:
             np_vec[:, i] = -np_vec[:, i]
 
-    numpy.testing.assert_array_equal(symm_orig, symm)
-    numpy.testing.assert_array_equal(dpnp_symm_orig, dpnp_symm)
+    assert_array_equal(symm_orig, symm)
+    assert_array_equal(dpnp_symm_orig, dpnp_symm)
 
     assert (dpnp_val.dtype == np_val.dtype)
     assert (dpnp_vec.dtype == np_vec.dtype)
     assert (dpnp_val.shape == np_val.shape)
     assert (dpnp_vec.shape == np_vec.shape)
 
-    numpy.testing.assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-05)
-    numpy.testing.assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05)
+    assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-05)
+    assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 def test_eigvals(type):
     if dpctl.get_current_device_type() != dpctl.device_type.gpu:
         pytest.skip("eigvals function doesn\'t work on CPU: https://github.com/IntelPython/dpnp/issues/1005")
@@ -144,12 +146,10 @@ def test_eigvals(type):
         ia = inp.array(a)
         result = inp.linalg.eigvals(ia)
         expected = numpy.linalg.eigvals(a)
-        numpy.testing.assert_allclose(expected, result, atol=0.5)
+        assert_allclose(expected, result, atol=0.5)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("array",
                          [[[1., 2.], [3., 4.]], [[0, 1, 2], [3, 2, -1], [4, -2, 3]]],
                          ids=['[[1., 2.], [3., 4.]]', '[[0, 1, 2], [3, 2, -1], [4, -2, 3]]'])
@@ -158,12 +158,10 @@ def test_inv(type, array):
     ia = inp.array(a)
     result = inp.linalg.inv(ia)
     expected = numpy.linalg.inv(a)
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
 @pytest.mark.parametrize("array",
                          [[0, 0], [0, 1], [1, 2], [[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]]],
                          ids=['[0, 0]', '[0, 1]', '[1, 2]', '[[0, 0], [0, 0]]', '[[1, 2], [1, 2]]', '[[1, 2], [3, 4]]'])
@@ -177,10 +175,11 @@ def test_matrix_rank(type, tol, array):
     result = inp.linalg.matrix_rank(ia, tol=tol)
     expected = numpy.linalg.matrix_rank(a, tol=tol)
 
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+@pytest.mark.usefixtures("suppress_divide_numpy_warnings")
 @pytest.mark.parametrize("array",
                          [[7], [1, 2], [1, 0]],
                          ids=['[7]', '[1, 2]', '[1, 0]'])
@@ -195,7 +194,7 @@ def test_norm1(array, ord, axis):
     ia = inp.array(a)
     result = inp.linalg.norm(ia, ord=ord, axis=axis)
     expected = numpy.linalg.norm(a, ord=ord, axis=axis)
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -213,7 +212,7 @@ def test_norm2(array, ord, axis):
     ia = inp.array(a)
     result = inp.linalg.norm(ia, ord=ord, axis=axis)
     expected = numpy.linalg.norm(a, ord=ord, axis=axis)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -231,13 +230,11 @@ def test_norm3(array, ord, axis):
     ia = inp.array(a)
     result = inp.linalg.norm(ia, ord=ord, axis=axis)
     expected = numpy.linalg.norm(a, ord=ord, axis=axis)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(2, 2), (3, 4), (5, 3), (16, 16)],
                          ids=['(2,2)', '(3,4)', '(5,3)', '(16,16)'])
@@ -262,7 +259,7 @@ def test_qr(type, shape, mode):
         tol = 1e-11
 
     # check decomposition
-    numpy.testing.assert_allclose(ia, numpy.dot(inp.asnumpy(dpnp_q), inp.asnumpy(dpnp_r)), rtol=tol, atol=tol)
+    assert_allclose(ia, numpy.dot(inp.asnumpy(dpnp_q), inp.asnumpy(dpnp_r)), rtol=tol, atol=tol)
 
     # NP change sign for comparison
     ncols = min(a.shape[0], a.shape[1])
@@ -273,15 +270,12 @@ def test_qr(type, shape, mode):
             np_r[i, :] = -np_r[i, :]
 
         if numpy.any(numpy.abs(np_r[i, :]) > tol):
-            numpy.testing.assert_allclose(inp.asnumpy(dpnp_q)[:, i], np_q[:, i], rtol=tol, atol=tol)
+            assert_allclose(inp.asnumpy(dpnp_q)[:, i], np_q[:, i], rtol=tol, atol=tol)
 
-    numpy.testing.assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol)
+    assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(2, 2), (3, 4), (5, 3), (16, 16)],
                          ids=['(2,2)', '(3,4)', '(5,3)', '(16,16)'])
@@ -310,10 +304,10 @@ def test_svd(type, shape):
         dpnp_diag_s[i, i] = dpnp_s[i]
 
     # check decomposition
-    numpy.testing.assert_allclose(ia, inp.dot(dpnp_u, inp.dot(dpnp_diag_s, dpnp_vt)), rtol=tol, atol=tol)
+    assert_allclose(ia, inp.dot(dpnp_u, inp.dot(dpnp_diag_s, dpnp_vt)), rtol=tol, atol=tol)
 
     # compare singular values
-    # numpy.testing.assert_allclose(dpnp_s, np_s, rtol=tol, atol=tol)
+    # assert_allclose(dpnp_s, np_s, rtol=tol, atol=tol)
 
     # change sign of vectors
     for i in range(min(shape[0], shape[1])):
@@ -323,5 +317,5 @@ def test_svd(type, shape):
 
     # compare vectors for non-zero values
     for i in range(numpy.count_nonzero(np_s > tol)):
-        numpy.testing.assert_allclose(inp.asnumpy(dpnp_u)[:, i], np_u[:, i], rtol=tol, atol=tol)
-        numpy.testing.assert_allclose(inp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol)
+        assert_allclose(inp.asnumpy(dpnp_u)[:, i], np_u[:, i], rtol=tol, atol=tol)
+        assert_allclose(inp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol)
diff --git a/tests/test_logic.py b/tests/test_logic.py
index b3280be07618..425106fd2efe 100644
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@@ -1,13 +1,16 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 
 import numpy
+from numpy.testing import (
+    assert_allclose,
+    assert_equal
+)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(0,), (4,), (2, 3), (2, 2, 2)],
                          ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)'])
@@ -31,16 +34,14 @@ def test_all(type, shape):
 
         np_res = numpy.all(a)
         dpnp_res = dpnp.all(ia)
-        numpy.testing.assert_allclose(dpnp_res, np_res)
+        assert_allclose(dpnp_res, np_res)
 
         np_res = a.all()
         dpnp_res = ia.all()
-        numpy.testing.assert_allclose(dpnp_res, np_res)
+        assert_allclose(dpnp_res, np_res)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 def test_allclose(type):
 
     a = numpy.random.rand(10)
@@ -51,7 +52,7 @@ def test_allclose(type):
 
     np_res = numpy.allclose(a, b)
     dpnp_res = dpnp.allclose(dpnp_a, dpnp_b)
-    numpy.testing.assert_allclose(dpnp_res, np_res)
+    assert_allclose(dpnp_res, np_res)
 
     a[0] = numpy.inf
 
@@ -59,12 +60,10 @@ def test_allclose(type):
 
     np_res = numpy.allclose(a, b)
     dpnp_res = dpnp.allclose(dpnp_a, dpnp_b)
-    numpy.testing.assert_allclose(dpnp_res, np_res)
+    assert_allclose(dpnp_res, np_res)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(0,), (4,), (2, 3), (2, 2, 2)],
                          ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)'])
@@ -88,58 +87,156 @@ def test_any(type, shape):
 
         np_res = numpy.any(a)
         dpnp_res = dpnp.any(ia)
-        numpy.testing.assert_allclose(dpnp_res, np_res)
+        assert_allclose(dpnp_res, np_res)
 
         np_res = a.any()
         dpnp_res = ia.any()
-        numpy.testing.assert_allclose(dpnp_res, np_res)
+        assert_allclose(dpnp_res, np_res)
+
+
+def test_equal():
+    a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
+    ia = dpnp.array(a)
+    for i in range(len(a)):
+        np_res = (a == i)
+        dpnp_res = (ia == i)
+        assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_greater():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
     for i in range(len(a) + 1):
         np_res = (a > i)
         dpnp_res = (ia > i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_greater_equal():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
     for i in range(len(a) + 1):
         np_res = (a >= i)
         dpnp_res = (ia >= i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_less():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
     for i in range(len(a) + 1):
         np_res = (a < i)
         dpnp_res = (ia < i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_less_equal():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
     for i in range(len(a) + 1):
         np_res = (a <= i)
         dpnp_res = (ia <= i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_not_equal():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
     for i in range(len(a)):
         np_res = (a != i)
         dpnp_res = (ia != i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
+
+
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+@pytest.mark.parametrize("op",
+                        ['logical_and', 'logical_or', 'logical_xor'],
+                        ids=['logical_and', 'logical_or', 'logical_xor'])
+def test_logic_comparison(op, dtype):
+    a = numpy.array([0, 0, 3, 2], dtype=dtype)
+    b = numpy.array([0, 4, 0, 2], dtype=dtype)
+
+    # x1 OP x2
+    np_res = getattr(numpy, op)(a, b)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(a), dpnp.array(b))
+    assert_equal(dpnp_res, np_res)
+
+    # x2 OP x1
+    np_res = getattr(numpy, op)(b, a)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(b), dpnp.array(a))
+    assert_equal(dpnp_res, np_res)
+
+    # numpy.tile(x1, (10,)) OP numpy.tile(x2, (10,))
+    a, b = numpy.tile(a, (10,)), numpy.tile(b, (10,))
+    np_res = getattr(numpy, op)(a, b)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(a), dpnp.array(b))
+    assert_equal(dpnp_res, np_res)
+
+    # numpy.tile(x2, (10, 2)) OP numpy.tile(x1, (10, 2))
+    a, b = numpy.tile(a, (10, 1)), numpy.tile(b, (10, 1))
+    np_res = getattr(numpy, op)(b, a)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(b), dpnp.array(a))
+    assert_equal(dpnp_res, np_res)
+
+
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+def test_logical_not(dtype):
+    a = dpnp.array([0, 4, 0, 2], dtype=dtype)
+
+    np_res = numpy.logical_not(a.asnumpy())
+    dpnp_res = dpnp.logical_not(a)
+    assert_equal(dpnp_res, np_res)
+
+
+@pytest.mark.parametrize("op",
+                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                          'logical_and', 'logical_or', 'logical_xor', 'not_equal'],
+                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                              'logical_and', 'logical_or', 'logical_xor', 'not_equal'])
+@pytest.mark.parametrize("x1",
+                         [[3, 4, 5, 6], [[1, 2, 3, 4], [5, 6, 7, 8]], [[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]],
+                         ids=['[3, 4, 5, 6]', '[[1, 2, 3, 4], [5, 6, 7, 8]]', '[[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]'])
+@pytest.mark.parametrize("x2",
+                         [5, [1, 2, 5, 6]],
+                         ids=['5', '[1, 2, 5, 6]'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+def test_elemwise_comparison(op, x1, x2, dtype):
+    create_func = lambda xp, a: xp.asarray(a, dtype=dtype) if not numpy.isscalar(a) else numpy.dtype(dtype=dtype).type(a)
+
+    np_x1, np_x2 = create_func(numpy, x1), create_func(numpy, x2)
+    dp_x1, dp_x2 = create_func(dpnp, np_x1), create_func(dpnp, np_x2)
+
+    # x1 OP x2
+    np_res = getattr(numpy, op)(np_x1, np_x2)
+    dpnp_res = getattr(dpnp, op)(dp_x1, dp_x2)
+    assert_equal(dpnp_res, np_res)
+
+    # x2 OP x1
+    np_res = getattr(numpy, op)(np_x2, np_x1)
+    dpnp_res = getattr(dpnp, op)(dp_x2, dp_x1)
+    assert_equal(dpnp_res, np_res)
+
+    # x1[::-1] OP x2
+    np_res = getattr(numpy, op)(np_x1[::-1], np_x2)
+    dpnp_res = getattr(dpnp, op)(dp_x1[::-1], dp_x2)
+    assert_equal(dpnp_res, np_res)
+
+
+@pytest.mark.parametrize("op",
+                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                          'logical_and', 'logical_or', 'logical_xor', 'not_equal'],
+                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                              'logical_and', 'logical_or', 'logical_xor', 'not_equal'])
+@pytest.mark.parametrize("sh1",
+                         [[10], [8, 4], [4, 1, 2]],
+                         ids=['(10,)', '(8, 4)', '(4, 1, 2)'])
+@pytest.mark.parametrize("sh2",
+                         [[12], [4, 8], [1, 8, 6]],
+                         ids=['(12,)', '(4, 8)', '(1, 8, 6)'])
+def test_comparison_no_broadcast_with_shapes(op, sh1, sh2):
+    x1, x2 = dpnp.random.randn(*sh1), dpnp.random.randn(*sh2)
+
+    # x1 OP x2
+    with pytest.raises(ValueError):
+        getattr(dpnp, op)(x1, x2)
+        getattr(numpy, op)(x1.asnumpy(), x2.asnumpy())
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 21071bec41e9..78f628908337 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -1,8 +1,15 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 
 import numpy
+from numpy.testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    assert_raises
+)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -10,27 +17,27 @@ class TestConvolve:
     def test_object(self):
         d = [1.] * 100
         k = [1.] * 3
-        numpy.testing.assert_array_almost_equal(dpnp.convolve(d, k)[2:-2], dpnp.full(98, 3))
+        assert_array_almost_equal(dpnp.convolve(d, k)[2:-2], dpnp.full(98, 3))
 
     def test_no_overwrite(self):
         d = dpnp.ones(100)
         k = dpnp.ones(3)
         dpnp.convolve(d, k)
-        numpy.testing.assert_array_equal(d, dpnp.ones(100))
-        numpy.testing.assert_array_equal(k, dpnp.ones(3))
+        assert_array_equal(d, dpnp.ones(100))
+        assert_array_equal(k, dpnp.ones(3))
 
     def test_mode(self):
         d = dpnp.ones(100)
         k = dpnp.ones(3)
         default_mode = dpnp.convolve(d, k, mode='full')
         full_mode = dpnp.convolve(d, k, mode='f')
-        numpy.testing.assert_array_equal(full_mode, default_mode)
+        assert_array_equal(full_mode, default_mode)
         # integer mode
-        with numpy.testing.assert_raises(ValueError):
+        with assert_raises(ValueError):
             dpnp.convolve(d, k, mode=-1)
-        numpy.testing.assert_array_equal(dpnp.convolve(d, k, mode=2), full_mode)
+        assert_array_equal(dpnp.convolve(d, k, mode=2), full_mode)
         # illegal arguments
-        with numpy.testing.assert_raises(TypeError):
+        with assert_raises(TypeError):
             dpnp.convolve(d, k, mode=None)
 
 
@@ -53,33 +60,39 @@ def test_diff(array):
     dpnp_a = dpnp.array(array)
     expected = numpy.diff(np_a)
     result = dpnp.diff(dpnp_a)
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
-@pytest.mark.parametrize("dtype1",
-                         [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.complex64, numpy.complex128],
-                         ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32', 'numpy.complex64', 'numpy.complex128'])
-@pytest.mark.parametrize("dtype2",
-                         [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.complex64, numpy.complex128],
-                         ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32', 'numpy.complex64', 'numpy.complex128'])
+@pytest.mark.parametrize("dtype1", get_all_dtypes())
+@pytest.mark.parametrize("dtype2", get_all_dtypes())
+@pytest.mark.parametrize("func",
+                         ['add', 'multiply', 'subtract', 'divide'])
 @pytest.mark.parametrize("data",
                          [[[1, 2], [3, 4]]],
                          ids=['[[1, 2], [3, 4]]'])
-def test_multiply_dtype(dtype1, dtype2, data):
+def test_op_multiple_dtypes(dtype1, func, dtype2, data):
     np_a = numpy.array(data, dtype=dtype1)
     dpnp_a = dpnp.array(data, dtype=dtype1)
 
     np_b = numpy.array(data, dtype=dtype2)
     dpnp_b = dpnp.array(data, dtype=dtype2)
 
-    result = dpnp.multiply(dpnp_a, dpnp_b)
-    expected = numpy.multiply(np_a, np_b)
-    numpy.testing.assert_array_equal(result, expected)
+    if func == 'subtract' and (dtype1 == dtype2 == dpnp.bool):
+        with pytest.raises(TypeError):
+            result = getattr(dpnp, func)(dpnp_a, dpnp_b)
+            expected = getattr(numpy, func)(np_a, np_b)
+    else:
+        result = getattr(dpnp, func)(dpnp_a, dpnp_b)
+        expected = getattr(numpy, func)(np_a, np_b)
+        assert_array_equal(result, expected)
 
 
 @pytest.mark.parametrize("rhs", [[[1, 2, 3], [4, 5, 6]], [2.0, 1.5, 1.0], 3, 0.3])
-@pytest.mark.parametrize("lhs", [[[6, 5, 4], [3, 2, 1]], [1.3, 2.6, 3.9], 5, 0.5])
-@pytest.mark.parametrize("dtype", [numpy.int32, numpy.int64, numpy.float32, numpy.float64])
+@pytest.mark.parametrize("lhs", [[[6, 5, 4], [3, 2, 1]], [1.3, 2.6, 3.9]])
+# TODO: achieve the same level of dtype support for all mathematical operations, like
+# @pytest.mark.parametrize("dtype", get_all_dtypes())
+# and to get rid of fallbacks on numpy allowed by below fixture
+# @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestMathematical:
 
     @staticmethod
@@ -90,74 +103,90 @@ def array_or_scalar(xp, data, dtype=None):
         return xp.array(data, dtype=dtype)
 
     def _test_mathematical(self, name, dtype, lhs, rhs):
-        a = self.array_or_scalar(dpnp, lhs, dtype=dtype)
-        b = self.array_or_scalar(dpnp, rhs, dtype=dtype)
-        result = getattr(dpnp, name)(a, b)
-
-        a = self.array_or_scalar(numpy, lhs, dtype=dtype)
-        b = self.array_or_scalar(numpy, rhs, dtype=dtype)
-        expected = getattr(numpy, name)(a, b)
-
-        numpy.testing.assert_allclose(result, expected, atol=1e-4)
-
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+        a_dpnp = self.array_or_scalar(dpnp, lhs, dtype=dtype)
+        b_dpnp = self.array_or_scalar(dpnp, rhs, dtype=dtype)
+
+        a_np = self.array_or_scalar(numpy, lhs, dtype=dtype)
+        b_np = self.array_or_scalar(numpy, rhs, dtype=dtype)
+
+        if name == 'subtract' and not numpy.isscalar(rhs) and dtype == dpnp.bool:
+            with pytest.raises(TypeError):
+                result = getattr(dpnp, name)(a_dpnp, b_dpnp)
+                expected = getattr(numpy, name)(a_np, b_np)
+        else:
+            result = getattr(dpnp, name)(a_dpnp, b_dpnp)
+            expected = getattr(numpy, name)(a_np, b_np)
+            assert_allclose(result, expected, atol=1e-4)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_add(self, dtype, lhs, rhs):
         self._test_mathematical('add', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_arctan2(self, dtype, lhs, rhs):
         self._test_mathematical('arctan2', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_copysign(self, dtype, lhs, rhs):
         self._test_mathematical('copysign', dtype, lhs, rhs)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_divide(self, dtype, lhs, rhs):
         self._test_mathematical('divide', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_fmod(self, dtype, lhs, rhs):
         self._test_mathematical('fmod', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_floor_divide(self, dtype, lhs, rhs):
         self._test_mathematical('floor_divide', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_hypot(self, dtype, lhs, rhs):
         self._test_mathematical('hypot', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_maximum(self, dtype, lhs, rhs):
         self._test_mathematical('maximum', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_minimum(self, dtype, lhs, rhs):
         self._test_mathematical('minimum', dtype, lhs, rhs)
 
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_multiply(self, dtype, lhs, rhs):
         self._test_mathematical('multiply', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_remainder(self, dtype, lhs, rhs):
         self._test_mathematical('remainder', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_power(self, dtype, lhs, rhs):
         self._test_mathematical('power', dtype, lhs, rhs)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_subtract(self, dtype, lhs, rhs):
         self._test_mathematical('subtract', dtype, lhs, rhs)
 
 
+@pytest.mark.usefixtures("suppress_divide_invalid_numpy_warnings")
 @pytest.mark.parametrize("val_type",
                          [bool, int, float],
                          ids=['bool', 'int', 'float'])
-@pytest.mark.parametrize("data_type",
-                         [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32'])
+@pytest.mark.parametrize("data_type", get_all_dtypes())
+@pytest.mark.parametrize("func",
+                         ['add', 'multiply', 'subtract', 'divide'])
 @pytest.mark.parametrize("val",
                          [0, 1, 5],
                          ids=['0', '1', '5'])
@@ -172,33 +201,78 @@ def test_subtract(self, dtype, lhs, rhs):
                               '[[1, 2], [3, 4]]',
                               '[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]',
                               '[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]'])
-def test_multiply_scalar(array, val, data_type, val_type):
+def test_op_with_scalar(array, val, func, data_type, val_type):
     np_a = numpy.array(array, dtype=data_type)
     dpnp_a = dpnp.array(array, dtype=data_type)
     val_ = val_type(val)
 
-    result = dpnp.multiply(dpnp_a, val_)
-    expected = numpy.multiply(np_a, val_)
-    numpy.testing.assert_array_equal(result, expected)
+    if func == 'subtract' and val_type == bool and data_type == dpnp.bool:
+        with pytest.raises(TypeError):
+            result = getattr(dpnp, func)(dpnp_a, val_)
+            expected = getattr(numpy, func)(np_a, val_)
+
+            result = getattr(dpnp, func)(val_, dpnp_a)
+            expected = getattr(numpy, func)(val_, np_a)
+    else:
+        result = getattr(dpnp, func)(dpnp_a, val_)
+        expected = getattr(numpy, func)(np_a, val_)
+        assert_allclose(result, expected)
+
+        result = getattr(dpnp, func)(val_, dpnp_a)
+        expected = getattr(numpy, func)(val_, np_a)
+        assert_allclose(result, expected)
+
+
+@pytest.mark.parametrize("shape",
+                         [(), (3, 2)],
+                         ids=['()', '(3, 2)'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_multiply_scalar(shape, dtype):
+    np_a = numpy.ones(shape, dtype=dtype)
+    dpnp_a = dpnp.ones(shape, dtype=dtype)
+
+    result = 0.5 * dpnp_a * 1.7
+    expected = 0.5 * np_a * 1.7
+    assert_allclose(result, expected)
+
 
-    result = dpnp.multiply(val_, dpnp_a)
-    expected = numpy.multiply(val_, np_a)
-    numpy.testing.assert_array_equal(result, expected)
+@pytest.mark.parametrize("shape",
+                         [(), (3, 2)],
+                         ids=['()', '(3, 2)'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_add_scalar(shape, dtype):
+    np_a = numpy.ones(shape, dtype=dtype)
+    dpnp_a = dpnp.ones(shape, dtype=dtype)
+
+    result = 0.5 + dpnp_a + 1.7
+    expected = 0.5 + np_a + 1.7
+    assert_allclose(result, expected)
 
 
 @pytest.mark.parametrize("shape",
                          [(), (3, 2)],
                          ids=['()', '(3, 2)'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float32, numpy.float64],
-                         ids=['numpy.float32', 'numpy.float64'])
-def test_multiply_scalar2(shape, dtype):
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_subtract_scalar(shape, dtype):
     np_a = numpy.ones(shape, dtype=dtype)
     dpnp_a = dpnp.ones(shape, dtype=dtype)
 
-    result = 0.5 * dpnp_a
-    expected = 0.5 * np_a
-    numpy.testing.assert_array_equal(result, expected)
+    result = 0.5 - dpnp_a - 1.7
+    expected = 0.5 - np_a - 1.7
+    assert_allclose(result, expected)
+
+
+@pytest.mark.parametrize("shape",
+                         [(), (3, 2)],
+                         ids=['()', '(3, 2)'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_divide_scalar(shape, dtype):
+    np_a = numpy.ones(shape, dtype=dtype)
+    dpnp_a = dpnp.ones(shape, dtype=dtype)
+
+    result = 0.5 / dpnp_a / 1.7
+    expected = 0.5 / np_a / 1.7
+    assert_allclose(result, expected)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -211,7 +285,7 @@ def test_nancumprod(array):
 
     result = dpnp.nancumprod(dpnp_a)
     expected = numpy.nancumprod(np_a)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -224,31 +298,25 @@ def test_nancumsum(array):
 
     result = dpnp.nancumsum(dpnp_a)
     expected = numpy.nancumsum(np_a)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("data",
                          [[[1., -1.], [0.1, -0.1]], [-2, -1, 0, 1, 2]],
                          ids=['[[1., -1.], [0.1, -0.1]]', '[-2, -1, 0, 1, 2]'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_negative(data, dtype):
     np_a = numpy.array(data, dtype=dtype)
     dpnp_a = dpnp.array(data, dtype=dtype)
 
     result = dpnp.negative(dpnp_a)
     expected = numpy.negative(np_a)
-    numpy.testing.assert_array_equal(result, expected)
+    assert_array_equal(result, expected)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("val_type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32'])
-@pytest.mark.parametrize("data_type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32'])
+@pytest.mark.parametrize("val_type", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+@pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("val",
                          [0, 1, 5],
                          ids=['0', '1', '5'])
@@ -269,12 +337,11 @@ def test_power(array, val, data_type, val_type):
     val_ = val_type(val)
     result = dpnp.power(dpnp_a, val_)
     expected = numpy.power(np_a, val_)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 class TestEdiff1d:
-    @pytest.mark.parametrize("data_type",
-                             [numpy.float64, numpy.float32, numpy.int64, numpy.int32])
+    @pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True))
     @pytest.mark.parametrize("array", [[1, 2, 4, 7, 0],
                                        [],
                                        [1],
@@ -285,7 +352,7 @@ def test_ediff1d_int(self, array, data_type):
 
         result = dpnp.ediff1d(dpnp_a)
         expected = numpy.ediff1d(np_a)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -297,13 +364,12 @@ def test_ediff1d_args(self):
 
         result = dpnp.ediff1d(np_a, to_end=to_end, to_begin=to_begin)
         expected = numpy.ediff1d(np_a, to_end=to_end, to_begin=to_begin)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestTrapz:
-    @pytest.mark.parametrize("data_type",
-                             [numpy.float64, numpy.float32, numpy.int64, numpy.int32])
+    @pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True))
     @pytest.mark.parametrize("array", [[1, 2, 3],
                                        [[1, 2, 3], [4, 5, 6]],
                                        [1, 4, 6, 9, 10, 12],
@@ -315,12 +381,10 @@ def test_trapz_default(self, array, data_type):
 
         result = dpnp.trapz(dpnp_a)
         expected = numpy.trapz(np_a)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
-    @pytest.mark.parametrize("data_type_y",
-                             [numpy.float64, numpy.float32, numpy.int64, numpy.int32])
-    @pytest.mark.parametrize("data_type_x",
-                             [numpy.float64, numpy.float32, numpy.int64, numpy.int32])
+    @pytest.mark.parametrize("data_type_y", get_all_dtypes(no_bool=True, no_complex=True))
+    @pytest.mark.parametrize("data_type_x", get_all_dtypes(no_bool=True, no_complex=True))
     @pytest.mark.parametrize("y_array", [[1, 2, 4, 5],
                                          [1., 2.5, 6., 7.]])
     @pytest.mark.parametrize("x_array", [[2, 5, 6, 9]])
@@ -333,7 +397,7 @@ def test_trapz_with_x_params(self, y_array, x_array, data_type_y, data_type_x):
 
         result = dpnp.trapz(dpnp_y, dpnp_x)
         expected = numpy.trapz(np_y, np_x)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("array", [[1, 2, 3], [4, 5, 6]])
     def test_trapz_with_x_param_2ndim(self, array):
@@ -342,7 +406,7 @@ def test_trapz_with_x_param_2ndim(self, array):
 
         result = dpnp.trapz(dpnp_a, dpnp_a)
         expected = numpy.trapz(np_a, np_a)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("y_array", [[1, 2, 4, 5],
                                          [1., 2.5, 6., 7., ]])
@@ -353,7 +417,7 @@ def test_trapz_with_dx_params(self, y_array, dx):
 
         result = dpnp.trapz(dpnp_y, dx=dx)
         expected = numpy.trapz(np_y, dx=dx)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -388,10 +452,9 @@ def test_cross_3x3(self, x1, x2, axisa, axisb, axisc, axis):
 
         result = dpnp.cross(dpnp_x1, dpnp_x2, axisa, axisb, axisc, axis)
         expected = numpy.cross(np_x1, np_x2, axisa, axisb, axisc, axis)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestGradient:
 
     @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9],
@@ -403,8 +466,9 @@ def test_gradient_y1(self, array):
 
         result = dpnp.gradient(dpnp_y)
         expected = numpy.gradient(np_y)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9],
                                        [3., 4., 7.5, 9.],
                                        [2, 6, 8, 10]])
@@ -415,7 +479,7 @@ def test_gradient_y1_dx(self, array, dx):
 
         result = dpnp.gradient(dpnp_y, dx)
         expected = numpy.gradient(np_y, dx)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
 class TestCeil:
@@ -433,7 +497,7 @@ def test_ceil(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.ceil(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -473,7 +537,7 @@ def test_floor(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.floor(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -513,7 +577,7 @@ def test_trunc(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.trunc(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -556,7 +620,7 @@ def test_power(self):
         np_array2 = numpy.array(array2_data, dtype=numpy.float64)
         expected = numpy.power(np_array1, np_array2, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
diff --git a/tests/test_random.py b/tests/test_random.py
index 54cb2fa3a4d7..bc3501f4d20b 100644
--- a/tests/test_random.py
+++ b/tests/test_random.py
@@ -75,7 +75,6 @@ def test_input_shape(func):
     assert shape == res.shape
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("func",
                          [dpnp.random.random,
                           dpnp.random.random_sample,
diff --git a/tests/test_random_state.py b/tests/test_random_state.py
index b93f52411c5d..0d1752c744ee 100644
--- a/tests/test_random_state.py
+++ b/tests/test_random_state.py
@@ -34,8 +34,8 @@ def get_default_floating():
 
 class TestNormal:
     @pytest.mark.parametrize("dtype",
-                             [dpnp.float32, dpnp.float64, None],
-                             ids=['float32', 'float64', 'None'])
+                             [dpnp.float32, dpnp.float64, dpnp.float, None],
+                             ids=['float32', 'float64', 'float', 'None'])
     @pytest.mark.parametrize("usm_type",
                              ["host", "device", "shared"],
                              ids=['host', 'device', 'shared'])
@@ -173,9 +173,9 @@ def test_fallback(self, loc, scale):
 
 
     @pytest.mark.parametrize("dtype",
-                             [dpnp.float16, dpnp.float, float, dpnp.integer, dpnp.int64, dpnp.int32, dpnp.int, int,
+                             [dpnp.float16, float, dpnp.integer, dpnp.int64, dpnp.int32, dpnp.int, int,
                               dpnp.longcomplex, dpnp.complex128, dpnp.complex64, dpnp.bool, dpnp.bool_],
-                             ids=['dpnp.float16', 'dpnp.float', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int32', 'dpnp.int', 'int',
+                             ids=['dpnp.float16', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int32', 'dpnp.int', 'int',
                                   'dpnp.longcomplex', 'dpnp.complex128', 'dpnp.complex64', 'dpnp.bool', 'dpnp.bool_'])
     def test_invalid_dtype(self, dtype):
         # dtype must be float32 or float64
@@ -257,8 +257,8 @@ def test_wrong_dims(self):
 
 class TestRandInt:
     @pytest.mark.parametrize("dtype",
-                             [int, dpnp.int32, dpnp.int],
-                             ids=['int', 'dpnp.int32', 'dpnp.int'])
+                             [int, dpnp.int32, dpnp.int, dpnp.integer],
+                             ids=['int', 'dpnp.int32', 'dpnp.int', 'dpnp.integer'])
     @pytest.mark.parametrize("usm_type",
                              ["host", "device", "shared"],
                              ids=['host', 'device', 'shared'])
@@ -267,6 +267,9 @@ def test_distr(self, dtype, usm_type):
         low = 1
         high = 10
 
+        if dtype in (dpnp.int, dpnp.integer) and dtype != dpnp.dtype('int32'):
+            pytest.skip("dtype isn't alias on dpnp.int32 on the target OS, so there will be a fallback")
+
         sycl_queue = dpctl.SyclQueue()
         data = RandomState(seed, sycl_queue=sycl_queue).randint(low=low,
                                                                 high=high,
@@ -319,7 +322,6 @@ def test_negative_bounds(self):
         assert_array_equal(actual, desired)
 
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_negative_interval(self):
         rs = RandomState(3567)
 
@@ -421,16 +423,16 @@ def test_bounds_fallback(self, low, high):
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("dtype",
-                             [dpnp.int64, dpnp.integer, dpnp.bool, dpnp.bool_, bool],
-                             ids=['dpnp.int64', 'dpnp.integer', 'dpnp.bool', 'dpnp.bool_', 'bool'])
+                             [dpnp.int64, dpnp.int, dpnp.integer, dpnp.bool, dpnp.bool_, bool],
+                             ids=['dpnp.int64', 'dpnp.int', 'dpnp.integer', 'dpnp.bool', 'dpnp.bool_', 'bool'])
     def test_dtype_fallback(self, dtype):
         seed = 157
         low = -3 if not dtype in {dpnp.bool_, bool} else 0
         high = 37 if not dtype in {dpnp.bool_, bool} else 2
         size = (3, 2, 5)
 
-        if dtype == dpnp.integer and dtype == dpnp.dtype('int32'):
-            pytest.skip("dpnp.integer is alias on dpnp.int32 on the target OS, so no fallback here")
+        if dtype in (dpnp.int, dpnp.integer) and dtype == dpnp.dtype('int32'):
+            pytest.skip("dtype is alias on dpnp.int32 on the target OS, so no fallback here")
 
         # dtype must be int or dpnp.int32, in other cases it will be a fallback to numpy
         actual = RandomState(seed).randint(low=low, high=high, size=size, dtype=dtype).asnumpy()
@@ -714,8 +716,8 @@ class TestUniform:
                              [[1.23, 10.54], [10.54, 1.23]],
                              ids=['(low, high)=[1.23, 10.54]', '(low, high)=[10.54, 1.23]'])
     @pytest.mark.parametrize("dtype",
-                             [dpnp.float32, dpnp.float64, dpnp.int32, None],
-                             ids=['float32', 'float64', 'int32', 'None'])
+                             [dpnp.float32, dpnp.float64, dpnp.float, dpnp.int32, None],
+                             ids=['float32', 'float64', 'float', 'int32', 'None'])
     @pytest.mark.parametrize("usm_type",
                              ["host", "device", "shared"],
                              ids=['host', 'device', 'shared'])
@@ -831,12 +833,15 @@ def test_fallback(self, low, high):
 
 
     @pytest.mark.parametrize("dtype",
-                             [dpnp.float16, dpnp.float, float, dpnp.integer, dpnp.int64, dpnp.int, int,
+                             [dpnp.float16, float, dpnp.integer, dpnp.int64, dpnp.int, int,
                               dpnp.longcomplex, dpnp.complex128, dpnp.complex64, dpnp.bool, dpnp.bool_],
-                             ids=['dpnp.float16', 'dpnp.float', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int', 'int',
+                             ids=['dpnp.float16', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int', 'int',
                                   'dpnp.longcomplex', 'dpnp.complex128', 'dpnp.complex64', 'dpnp.bool', 'dpnp.bool_'])
     def test_invalid_dtype(self, dtype):
-        # dtype must be float32 or float64
+        if dtype in (dpnp.int, dpnp.integer) and dtype == dpnp.dtype('int32'):
+            pytest.skip("dtype is alias on dpnp.int32 on the target OS, so no error here")
+
+        # dtype must be int32, float32 or float64
         assert_raises(TypeError, RandomState().uniform, dtype=dtype)
 
 
diff --git a/tests/test_strides.py b/tests/test_strides.py
index 7ec1d6b3f03f..02e8c8689757 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -1,8 +1,13 @@
 import math
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
+
 import numpy
+from numpy.testing import (
+    assert_allclose
+)
 
 
 def _getattr(ex, str_):
@@ -15,12 +20,10 @@ def _getattr(ex, str_):
 
 @pytest.mark.parametrize("func_name",
                          ['abs', ])
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
-def test_strides(func_name, type):
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strides(func_name, dtype):
     shape = (4, 4)
-    a = numpy.arange(shape[0] * shape[1], dtype=type).reshape(shape)
+    a = numpy.arange(shape[0] * shape[1], dtype=dtype).reshape(shape)
     a_strides = a[0::2, 0::2]
     dpa = dpnp.array(a)
     dpa_strides = dpa[0::2, 0::2]
@@ -31,17 +34,16 @@ def test_strides(func_name, type):
     numpy_func = _getattr(numpy, func_name)
     expected = numpy_func(a_strides)
 
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
+@pytest.mark.usefixtures("suppress_divide_invalid_numpy_warnings")
 @pytest.mark.parametrize("func_name",
                          ["arccos", "arccosh", "arcsin", "arcsinh", "arctan", "arctanh", "cbrt", "ceil", "copy", "cos",
                           "cosh", "conjugate", "degrees", "ediff1d", "exp", "exp2", "expm1", "fabs", "floor", "log",
                           "log10", "log1p", "log2", "negative", "radians", "sign", "sin", "sinh", "sqrt", "square",
                           "tanh", "trunc"])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(10,)],
                          ids=["(10,)"])
@@ -58,12 +60,10 @@ def test_strides_1arg(func_name, dtype, shape):
     numpy_func = _getattr(numpy, func_name)
     expected = numpy_func(b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(10,)],
                          ids=["(10,)"])
@@ -80,12 +80,10 @@ def test_strides_erf(dtype, shape):
     for idx, val in enumerate(b):
         expected[idx] = math.erf(val)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(10,)],
                          ids=["(10,)"])
@@ -101,12 +99,10 @@ def test_strides_reciprocal(dtype, shape):
     result = dpnp.reciprocal(dpb)
     expected = numpy.reciprocal(b)
 
-    numpy.testing.assert_allclose(result, expected, rtol=1e-06)
+    assert_allclose(result, expected, rtol=1e-06)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(10,)],
                          ids=["(10,)"])
@@ -120,14 +116,12 @@ def test_strides_tan(dtype, shape):
     result = dpnp.tan(dpb)
     expected = numpy.tan(b)
 
-    numpy.testing.assert_allclose(result, expected, rtol=1e-06)
+    assert_allclose(result, expected, rtol=1e-06)
 
 
 @pytest.mark.parametrize("func_name",
                          ["add", "arctan2", "hypot", "maximum", "minimum", "multiply", "power", "subtract"])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
                          ids=["(3, 3)"])
@@ -144,7 +138,7 @@ def test_strides_2args(func_name, dtype, shape):
     numpy_func = _getattr(numpy, func_name)
     expected = numpy_func(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
 @pytest.mark.parametrize("func_name",
@@ -168,12 +162,10 @@ def test_strides_bitwise(func_name, dtype, shape):
     numpy_func = _getattr(numpy, func_name)
     expected = numpy_func(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
                          ids=["(3, 3)"])
@@ -187,13 +179,10 @@ def test_strides_copysign(dtype, shape):
     result = dpnp.copysign(dpa, dpb)
     expected = numpy.copysign(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
                          ids=["(3, 3)"])
@@ -207,13 +196,10 @@ def test_strides_fmod(dtype, shape):
     result = dpnp.fmod(dpa, dpb)
     expected = numpy.fmod(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
                          ids=["(3, 3)"])
@@ -227,4 +213,4 @@ def test_strides_true_devide(dtype, shape):
     result = dpnp.fmod(dpa, dpb)
     expected = numpy.fmod(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 26a71eef2936..1bffa18111b8 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -1,9 +1,15 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 import dpctl
 import numpy
 
+from numpy.testing import (
+    assert_array_equal,
+    assert_raises
+)
+
 
 list_of_backend_str = [
     "host",
@@ -17,7 +23,7 @@
     "cpu",
 ]
 
-available_devices = [d for d in dpctl.get_devices() if not d.has_aspect_host]
+available_devices = [d for d in dpctl.get_devices() if not getattr(d, 'has_aspect_host', False)]
 
 valid_devices = []
 for device in available_devices:
@@ -76,27 +82,30 @@ def vvsort(val, vec, size, xp):
     "func, arg, kwargs",
     [
         pytest.param("arange",
-                     -25.7,
+                     [-25.7],
                      {'stop': 10**8, 'step': 15}),
         pytest.param("full",
-                     (2,2),
+                     [(2,2)],
                      {'fill_value': 5}),
+        pytest.param("eye",
+                     [4, 2],
+                     {}),
         pytest.param("ones",
-                     (2,2),
+                     [(2,2)],
                      {}),
         pytest.param("zeros",
-                     (2,2),
+                     [(2,2)],
                      {})
     ])
 @pytest.mark.parametrize("device",
                           valid_devices,
                           ids=[device.filter_string for device in valid_devices])
 def test_array_creation(func, arg, kwargs, device):
-    numpy_array = getattr(numpy, func)(arg, **kwargs)
+    numpy_array = getattr(numpy, func)(*arg, **kwargs)
 
     dpnp_kwargs = dict(kwargs)
     dpnp_kwargs['device'] = device
-    dpnp_array = getattr(dpnp, func)(arg, **dpnp_kwargs)
+    dpnp_array = getattr(dpnp, func)(*arg, **dpnp_kwargs)
 
     numpy.testing.assert_array_equal(numpy_array, dpnp_array)
     assert dpnp_array.sycl_device == device
@@ -152,12 +161,22 @@ def test_array_creation_like(func, kwargs, device_x, device_y):
 
     dpnp_kwargs = dict(kwargs)
     dpnp_kwargs['device'] = device_y
-    
+
     y = getattr(dpnp, func)(x, **dpnp_kwargs)
     numpy.testing.assert_array_equal(y_orig, y)
     assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
 
 
+@pytest.mark.parametrize("func", ["tril", "triu"], ids=["tril", "triu"])
+@pytest.mark.parametrize("device",
+                          valid_devices,
+                          ids=[device.filter_string for device in valid_devices])
+def test_tril_triu(func, device):
+    x0 = dpnp.ones((3,3), device=device)
+    x = getattr(dpnp, func)(x0)
+    assert_sycl_queue_equal(x.sycl_queue, x0.sycl_queue)
+
+
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize(
     "func,data",
@@ -335,36 +354,114 @@ def test_broadcasting(func, data1, data2, device):
     assert_sycl_queue_equal(result_queue, expected_queue)
 
 
+@pytest.mark.parametrize("func", ["add", "copysign", "divide", "floor_divide", "fmod",
+                                  "maximum", "minimum", "multiply", "outer", "power",
+                                  "remainder", "subtract"])
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+def test_2in_1out_diff_queue_but_equal_context(func, device):
+    x1 = dpnp.arange(10)
+    x2 = dpnp.arange(10, sycl_queue=dpctl.SyclQueue(device))[::-1]
+    with assert_raises(ValueError):
+        getattr(dpnp, func)(x1, x2)
+
+
+@pytest.mark.parametrize(
+    "func, kwargs",
+    [
+        pytest.param("normal",
+                     {'loc': 1.0, 'scale': 3.4, 'size': (5, 12)}),
+        pytest.param("rand",
+                     {'d0': 20}),
+        pytest.param("randint",
+                     {'low': 2, 'high': 15, 'size': (4, 8, 16), 'dtype': dpnp.int32}),
+        pytest.param("randn",
+                     {'d0': 20}),
+        pytest.param("random",
+                     {'size': (35, 45)}),
+        pytest.param("random_integers",
+                     {'low': -17, 'high': 3, 'size': (12, 16)}),
+        pytest.param("random_sample",
+                     {'size': (7, 7)}),
+        pytest.param("ranf",
+                     {'size': (10, 7, 12)}),
+        pytest.param("sample",
+                     {'size': (7, 9)}),
+        pytest.param("standard_normal",
+                     {'size': (4, 4, 8)}),
+        pytest.param("uniform",
+                     {'low': 1.0, 'high': 2.0, 'size': (4, 2, 5)})
+    ])
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
 @pytest.mark.parametrize("usm_type",
                          ["host", "device", "shared"])
-@pytest.mark.parametrize("size",
-                         [None, (), 3, (2, 1), (4, 2, 5)],
-                         ids=['None', '()', '3', '(2,1)', '(4,2,5)'])
-def test_uniform(usm_type, size):
-    low = 1.0
-    high = 2.0
-    res = dpnp.random.uniform(low, high, size=size, usm_type=usm_type)
+def test_random(func, kwargs, device, usm_type):
+    kwargs = {**kwargs, 'device': device, 'usm_type': usm_type}
+
+    # test with default SYCL queue per a device
+    res_array = getattr(dpnp.random, func)(**kwargs)
+    assert device == res_array.sycl_device
+    assert usm_type == res_array.usm_type
 
-    assert usm_type == res.usm_type
+    sycl_queue = dpctl.SyclQueue(device, property="in_order")
+    kwargs['device'] = None
+    kwargs['sycl_queue'] = sycl_queue
 
+    # test with in-order SYCL queue per a device and passed as argument
+    res_array = getattr(dpnp.random, func)(**kwargs)
+    assert usm_type == res_array.usm_type
+    assert_sycl_queue_equal(res_array.sycl_queue, sycl_queue)
 
+
+@pytest.mark.parametrize(
+    "func, args, kwargs",
+    [
+        pytest.param("normal",
+                     [],
+                     {'loc': 1.0, 'scale': 3.4, 'size': (5, 12)}),
+        pytest.param("rand",
+                     [15, 30, 5],
+                     {}),
+        pytest.param("randint",
+                     [],
+                     {'low': 2, 'high': 15, 'size': (4, 8, 16), 'dtype': dpnp.int32}),
+        pytest.param("randn",
+                     [20, 5, 40],
+                     {}),
+        pytest.param("random_sample",
+                     [],
+                     {'size': (7, 7)}),
+        pytest.param("standard_normal",
+                     [],
+                     {'size': (4, 4, 8)}),
+        pytest.param("uniform",
+                     [],
+                     {'low': 1.0, 'high': 2.0, 'size': (4, 2, 5)})
+    ])
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
 @pytest.mark.parametrize("usm_type",
                          ["host", "device", "shared"])
-@pytest.mark.parametrize("seed",
-                         [None, (), 123, (12, 58), (147, 56, 896), [1, 654, 78]],
-                         ids=['None', '()', '123', '(12,58)', '(147,56,896)', '[1,654,78]'])
-def test_rs_uniform(usm_type, seed):
-    seed = 123
-    sycl_queue = dpctl.SyclQueue()
-    low = 1.0
-    high = 2.0
-    rs = dpnp.random.RandomState(seed, sycl_queue=sycl_queue)
-    res = rs.uniform(low, high, usm_type=usm_type)
+def test_random_state(func, args, kwargs, device, usm_type):
+    kwargs = {**kwargs, 'usm_type': usm_type}
 
-    assert usm_type == res.usm_type
+    # test with default SYCL queue per a device
+    rs = dpnp.random.RandomState(seed=1234567, device=device)
+    res_array = getattr(rs, func)(*args, **kwargs)
+    assert device == res_array.sycl_device
+    assert usm_type == res_array.usm_type
 
-    res_sycl_queue = res.get_array().sycl_queue
-    assert_sycl_queue_equal(res_sycl_queue, sycl_queue)
+    sycl_queue = dpctl.SyclQueue(device, property="in_order")
+
+    # test with in-order SYCL queue per a device and passed as argument
+    rs = dpnp.random.RandomState((147, 56, 896), sycl_queue=sycl_queue)
+    res_array = getattr(rs, func)(*args, **kwargs)
+    assert usm_type == res_array.usm_type
+    assert_sycl_queue_equal(res_array.sycl_queue, sycl_queue)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -569,7 +666,7 @@ def test_eig(device):
     dpnp_val_queue = dpnp_val.get_array().sycl_queue
     dpnp_vec_queue = dpnp_vec.get_array().sycl_queue
 
-    # compare queue and device    
+    # compare queue and device
     assert_sycl_queue_equal(dpnp_val_queue, expected_queue)
     assert_sycl_queue_equal(dpnp_vec_queue, expected_queue)
 
@@ -655,7 +752,6 @@ def test_qr(device):
     assert_sycl_queue_equal(dpnp_r_queue, expected_queue)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("device",
                         valid_devices,
                         ids=[device.filter_string for device in valid_devices])
@@ -663,7 +759,7 @@ def test_svd(device):
     tol = 1e-12
     shape = (2,2)
     numpy_data = numpy.arange(shape[0] * shape[1]).reshape(shape)
-    dpnp_data = dpnp.arange(shape[0] * shape[1]).reshape(shape)
+    dpnp_data = dpnp.arange(shape[0] * shape[1], device=device).reshape(shape)
     np_u, np_s, np_vt = numpy.linalg.svd(numpy_data)
     dpnp_u, dpnp_s, dpnp_vt = dpnp.linalg.svd(dpnp_data)
 
@@ -675,7 +771,7 @@ def test_svd(device):
     assert (dpnp_vt.shape == np_vt.shape)
 
     # check decomposition
-    dpnp_diag_s = dpnp.zeros(shape, dtype=dpnp_s.dtype)
+    dpnp_diag_s = dpnp.zeros(shape, dtype=dpnp_s.dtype, device=device)
     for i in range(dpnp_s.size):
         dpnp_diag_s[i, i] = dpnp_s[i]
 
@@ -739,3 +835,39 @@ def test_array_copy(device, func, device_param, queue_param):
     result = dpnp.array(dpnp_data, **kwargs)
 
     assert_sycl_queue_equal(result.sycl_queue, dpnp_data.sycl_queue)
+
+
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+#TODO need to delete no_bool=True when use dlpack > 0.7 version
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True, no_bool=True))
+@pytest.mark.parametrize("shape", [tuple(), (2,), (3, 0, 1), (2, 2, 2)])
+def test_from_dlpack(arr_dtype, shape, device):
+    X = dpnp.empty(shape=shape, dtype=arr_dtype, device=device)
+    Y = dpnp.from_dlpack(X)
+    assert_array_equal(X, Y)
+    assert X.__dlpack_device__() == Y.__dlpack_device__()
+    assert X.sycl_device == Y.sycl_device
+    assert X.sycl_context == Y.sycl_context
+    assert X.usm_type == Y.usm_type
+    if Y.ndim:
+        V = Y[::-1]
+        W = dpnp.from_dlpack(V)
+        assert V.strides == W.strides
+
+
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+#TODO need to delete no_bool=True when use dlpack > 0.7 version
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True, no_bool=True))
+def test_from_dlpack_with_dpt(arr_dtype, device):
+    X = dpctl.tensor.empty((64,), dtype=arr_dtype, device=device)
+    Y = dpnp.from_dlpack(X)
+    assert_array_equal(X, Y)
+    assert isinstance(Y, dpnp.dpnp_array.dpnp_array)
+    assert X.__dlpack_device__() == Y.__dlpack_device__()
+    assert X.sycl_device == Y.sycl_device
+    assert X.sycl_context == Y.sycl_context
+    assert X.usm_type == Y.usm_type
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 094fe419c263..1a33a1d655dd 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -11,16 +11,17 @@
 ]
 
 
-@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
-def test_coerced_usm_types_sum(usm_type):
-    x = dp.arange(10, usm_type = "device")
-    y = dp.arange(10, usm_type = usm_type)
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_sum(usm_type_x, usm_type_y):
+    x = dp.arange(1000, usm_type = usm_type_x)
+    y = dp.arange(1000, usm_type = usm_type_y)
 
-    z = x + y
-    
-    assert z.usm_type == x.usm_type
-    assert z.usm_type == "device"
-    assert y.usm_type == usm_type
+    z = 1.3 + x + y + 2
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
@@ -29,8 +30,34 @@ def test_coerced_usm_types_mul(usm_type_x, usm_type_y):
     x = dp.arange(10, usm_type = usm_type_x)
     y = dp.arange(10, usm_type = usm_type_y)
 
-    z = x * y
-    
+    z = 3 * x * y * 1.5
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_subtract(usm_type_x, usm_type_y):
+    x = dp.arange(50, usm_type = usm_type_x)
+    y = dp.arange(50, usm_type = usm_type_y)
+
+    z = 20 - x - y - 7.4
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_divide(usm_type_x, usm_type_y):
+    x = dp.arange(120, usm_type = usm_type_x)
+    y = dp.arange(120, usm_type = usm_type_y)
+
+    z = 2 / x / y / 1.5
+
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
@@ -61,3 +88,32 @@ def test_array_creation(func, args, usm_type_x, usm_type_y):
 
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y
+
+
+@pytest.mark.skip()
+@pytest.mark.parametrize("func", ["tril", "triu"], ids=["tril", "triu"])
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+def test_tril_triu(func, usm_type):
+    x0 = dp.ones((3,3), usm_type=usm_type)
+    x = getattr(dp, func)(x0)
+    assert x.usm_type == usm_type
+
+
+@pytest.mark.parametrize("op",
+                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                          'logical_and', 'logical_or', 'logical_xor', 'not_equal'],
+                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                              'logical_and', 'logical_or', 'logical_xor', 'not_equal'])
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_logic_op(op, usm_type_x, usm_type_y):
+    x = dp.arange(100, usm_type = usm_type_x)
+    y = dp.arange(100, usm_type = usm_type_y)[::-1]
+
+    z = getattr(dp, op)(x, y)
+    zx = getattr(dp, op)(x, 50)
+    zy = getattr(dp, op)(30, y)
+
+    assert x.usm_type == zx.usm_type == usm_type_x
+    assert y.usm_type == zy.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
diff --git a/tests/third_party/cupy/creation_tests/test_basic.py b/tests/third_party/cupy/creation_tests/test_basic.py
index 337718d3caf3..1adcf98f969b 100644
--- a/tests/third_party/cupy/creation_tests/test_basic.py
+++ b/tests/third_party/cupy/creation_tests/test_basic.py
@@ -164,7 +164,7 @@ def test_empty_zero_sized_array_strides(self, order):
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
     def test_eye(self, xp, dtype):
-        return xp.eye(5, 4, 1, dtype)
+        return xp.eye(5, 4, k=1, dtype=dtype)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
diff --git a/tests/third_party/cupy/creation_tests/test_from_data.py b/tests/third_party/cupy/creation_tests/test_from_data.py
index e07d927b1cf0..ce71ef311a56 100644
--- a/tests/third_party/cupy/creation_tests/test_from_data.py
+++ b/tests/third_party/cupy/creation_tests/test_from_data.py
@@ -454,6 +454,7 @@ def test_asfortranarray_cuda_array_zero_dim_dtype(
         a = xp.ones((), dtype=dtype_a)
         return xp.asfortranarray(a, dtype=dtype_b)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_fromfile(self, xp):
         with tempfile.TemporaryFile() as fh:
diff --git a/tests/third_party/cupy/creation_tests/test_matrix.py b/tests/third_party/cupy/creation_tests/test_matrix.py
index a5471f213ebf..fe144cbc58c4 100644
--- a/tests/third_party/cupy/creation_tests/test_matrix.py
+++ b/tests/third_party/cupy/creation_tests/test_matrix.py
@@ -140,6 +140,7 @@ def test_tri_posi(self, xp, dtype):
     {'shape': (2, 3, 4)},
 )
 @testing.gpu
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestTriLowerAndUpper(unittest.TestCase):
 
     @testing.for_all_dtypes(no_complex=True)
@@ -148,7 +149,6 @@ def test_tril(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
         return xp.tril(m)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_tril_array_like(self, xp):
         return xp.tril([[1, 2], [3, 4]])
@@ -157,13 +157,13 @@ def test_tril_array_like(self, xp):
     @testing.numpy_cupy_array_equal()
     def test_tril_nega(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
-        return xp.tril(m, -1)
+        return xp.tril(m, k=-1)
 
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_array_equal()
     def test_tril_posi(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
-        return xp.tril(m, 1)
+        return xp.tril(m, k=1)
 
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_array_equal()
@@ -171,7 +171,6 @@ def test_triu(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
         return xp.triu(m)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_triu_array_like(self, xp):
         return xp.triu([[1, 2], [3, 4]])
@@ -180,10 +179,10 @@ def test_triu_array_like(self, xp):
     @testing.numpy_cupy_array_equal()
     def test_triu_nega(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
-        return xp.triu(m, -1)
+        return xp.triu(m, k=-1)
 
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_array_equal()
     def test_triu_posi(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
-        return xp.triu(m, 1)
+        return xp.triu(m, k=1)
diff --git a/tests/third_party/cupy/creation_tests/test_ranges.py b/tests/third_party/cupy/creation_tests/test_ranges.py
index 75960e492c17..4d5bc03f81b0 100644
--- a/tests/third_party/cupy/creation_tests/test_ranges.py
+++ b/tests/third_party/cupy/creation_tests/test_ranges.py
@@ -54,7 +54,7 @@ def test_arange8(self, xp, dtype):
 
     def test_arange9(self):
         for xp in (numpy, cupy):
-            with pytest.raises(ValueError):
+            with pytest.raises((ValueError, TypeError)):
                 xp.arange(10, dtype=xp.bool_)
 
     @testing.numpy_cupy_array_equal()
diff --git a/tests/third_party/cupy/indexing_tests/test_generate.py b/tests/third_party/cupy/indexing_tests/test_generate.py
index d10e503bcec8..2bb0404ab599 100644
--- a/tests/third_party/cupy/indexing_tests/test_generate.py
+++ b/tests/third_party/cupy/indexing_tests/test_generate.py
@@ -28,7 +28,7 @@ def test_indices_list2(self, xp, dtype):
 
     def test_indices_list3(self):
         for xp in (numpy, cupy):
-            with pytest.raises(ValueError):
+            with pytest.raises((ValueError, TypeError)):
                 xp.indices((1, 2, 3, 4), dtype=xp.bool_)
 
 
diff --git a/tests/third_party/cupy/indexing_tests/test_insert.py b/tests/third_party/cupy/indexing_tests/test_insert.py
index ed6a156e8848..fdcc5357e19e 100644
--- a/tests/third_party/cupy/indexing_tests/test_insert.py
+++ b/tests/third_party/cupy/indexing_tests/test_insert.py
@@ -42,7 +42,7 @@ class TestPlaceRaises(unittest.TestCase):
     def test_place_empty_value_error(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange(self.shape, xp, dtype)
-            mask = testing.shaped_arange(self.shape, xp, numpy.int) % 2 == 0
+            mask = testing.shaped_arange(self.shape, xp, numpy.int_) % 2 == 0
             vals = testing.shaped_random((0,), xp, dtype)
             with pytest.raises(ValueError):
                 xp.place(a, mask, vals)
diff --git a/tests/third_party/cupy/logic_tests/test_comparison.py b/tests/third_party/cupy/logic_tests/test_comparison.py
index 0be9eaeee610..67848359188d 100644
--- a/tests/third_party/cupy/logic_tests/test_comparison.py
+++ b/tests/third_party/cupy/logic_tests/test_comparison.py
@@ -8,7 +8,6 @@
 from tests.third_party.cupy import testing
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestComparison(unittest.TestCase):
 
@@ -38,7 +37,6 @@ def test_equal(self):
         self.check_binary('equal')
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestComparisonOperator(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/logic_tests/test_ops.py b/tests/third_party/cupy/logic_tests/test_ops.py
index 55b8617882b1..cdbd035cd265 100644
--- a/tests/third_party/cupy/logic_tests/test_ops.py
+++ b/tests/third_party/cupy/logic_tests/test_ops.py
@@ -20,18 +20,14 @@ def check_binary(self, name, xp, dtype):
         b = testing.shaped_reverse_arange((2, 3), xp, dtype)
         return getattr(xp, name)(a, b)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_and(self):
         self.check_binary('logical_and')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_or(self):
         self.check_binary('logical_or')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_xor(self):
         self.check_binary('logical_xor')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_not(self):
         self.check_unary('logical_not')
diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py
index 28771b4979b5..027722d8bef2 100644
--- a/tests/third_party/cupy/math_tests/test_arithmetic.py
+++ b/tests/third_party/cupy/math_tests/test_arithmetic.py
@@ -1,5 +1,6 @@
 import itertools
 import unittest
+import warnings
 
 import numpy
 import pytest
@@ -130,8 +131,8 @@ def check_binary(self, xp):
 
         func = getattr(xp, self.name)
         with testing.NumpyError(divide='ignore'):
-            with numpy.warnings.catch_warnings():
-                numpy.warnings.filterwarnings('ignore')
+            with warnings.catch_warnings():
+                warnings.filterwarnings('ignore')
                 if self.use_dtype:
                     y = func(arg1, arg2, dtype=self.dtype)
                 else:
@@ -145,17 +146,14 @@ def check_binary(self, xp):
                 y = y.astype(numpy.complex64)
 
         # NumPy returns an output array of another type than DPNP when input ones have diffrent types.
-        if self.name == 'multiply' and xp is cupy:
-            if xp.isscalar(arg1) and xp.isscalar(arg2):
-                # If both are scalars, the result will be a scalar, so needs to convert into numpy-scalar.
-                y = numpy.asarray(y)
-            elif dtype1 != dtype2:
-                is_array_arg1 = not xp.isscalar(arg1)
-                is_array_arg2 = not xp.isscalar(arg2)
+        if xp is cupy and dtype1 != dtype2 and not self.use_dtype:
+            is_array_arg1 = not xp.isscalar(arg1)
+            is_array_arg2 = not xp.isscalar(arg2)
 
-                is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating)
-                is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type)
+            is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating)
+            is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type)
 
+            if self.name in ('add', 'multiply', 'subtract'):
                 if is_array_arg1 and is_array_arg2:
                     # If both inputs are arrays where one is of floating type and another - integer,
                     # NumPy will return an output array of always "float64" type,
@@ -170,6 +168,13 @@ def check_binary(self, xp):
                         y = y.astype(dtype1)
                     elif is_array_arg2 and not is_array_arg1:
                         y = y.astype(dtype2)
+            elif self.name in ('divide', 'true_divide'):
+                # If one input is an array of float32 and another - an integer or floating scalar,
+                # NumPy will return an output array of float32, while DPNP will return the array of float64,
+                # since NumPy would use the same float64 type when instead of scalar here is array of integer of floating type.
+                if not (is_array_arg1 and is_array_arg2):
+                    if (is_array_arg1 and arg1.dtype == numpy.float32) ^ (is_array_arg2 and arg2.dtype == numpy.float32):
+                        y = y.astype(numpy.float32)
 
         # NumPy returns different values (nan/inf) on division by zero
         # depending on the architecture.
@@ -187,7 +192,6 @@ def check_binary(self, xp):
 @testing.gpu
 @testing.parameterize(*(
     testing.product({
-        # TODO(unno): boolean subtract causes DeprecationWarning in numpy>=1.13
         'arg1': [testing.shaped_arange((2, 3), numpy, dtype=d)
                  for d in all_types
                  ] + [0, 0.0, 2, 2.0],
@@ -282,7 +286,6 @@ def test_modf(self, xp, dtype):
     'xp': [numpy, cupy],
     'shape': [(3, 2), (), (3, 0, 2)]
 }))
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestBoolSubtract(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/random_tests/test_sample.py b/tests/third_party/cupy/random_tests/test_sample.py
index 3f8a0169ac12..f3b844cdc6a5 100644
--- a/tests/third_party/cupy/random_tests/test_sample.py
+++ b/tests/third_party/cupy/random_tests/test_sample.py
@@ -33,7 +33,6 @@ def test_lo_hi_nonrandom(self):
         a = random.randint(-1.1, -0.9, size=(2, 2))
         numpy.testing.assert_array_equal(a, cupy.full((2, 2), -1))
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_zero_sizes(self):
         a = random.randint(10, size=(0,))
         numpy.testing.assert_array_equal(a, cupy.array(()))
@@ -112,7 +111,6 @@ def test_goodness_of_fit_2(self):
         self.assertTrue(hypothesis.chi_square_test(counts, expected))
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestRandintDtype(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/statistics_tests/test_meanvar.py b/tests/third_party/cupy/statistics_tests/test_meanvar.py
index aea22d02c511..60d3413b0daa 100644
--- a/tests/third_party/cupy/statistics_tests/test_meanvar.py
+++ b/tests/third_party/cupy/statistics_tests/test_meanvar.py
@@ -89,7 +89,6 @@ def test_median_axis_sequence(self, xp, dtype):
         return xp.median(a, self.axis, keepdims=self.keepdims)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestAverage(unittest.TestCase):
 
@@ -101,12 +100,14 @@ def test_average_all(self, xp, dtype):
         a = testing.shaped_arange((2, 3), xp, dtype)
         return xp.average(a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_average_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return xp.average(a, axis=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_average_weights(self, xp, dtype):
@@ -114,6 +115,7 @@ def test_average_weights(self, xp, dtype):
         w = testing.shaped_arange((2, 3), xp, dtype)
         return xp.average(a, weights=w)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_average_axis_weights(self, xp, dtype):
@@ -132,6 +134,7 @@ def check_returned(self, a, axis, weights):
         testing.assert_allclose(average_cpu, average_gpu)
         testing.assert_allclose(sum_weights_cpu, sum_weights_gpu)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     def test_returned(self, dtype):
         a = testing.shaped_arange((2, 3), numpy, dtype)
diff --git a/tests_external/skipped_tests_numpy.tbl b/tests_external/skipped_tests_numpy.tbl
index 30b66da5e663..c2c0dc78ec54 100644
--- a/tests_external/skipped_tests_numpy.tbl
+++ b/tests_external/skipped_tests_numpy.tbl
@@ -318,83 +318,6 @@ tests/test_datetime.py::TestDateTime::test_timedelta_np_int_construction[Y]
 tests/test_datetime.py::TestDateTime::test_timedelta_object_array_conversion
 tests/test_datetime.py::TestDateTime::test_timedelta_scalar_construction
 tests/test_datetime.py::TestDateTime::test_timedelta_scalar_construction_units
-tests/test_defchararray.py::TestBasic::test_from_object_array
-tests/test_defchararray.py::TestBasic::test_from_object_array_unicode
-tests/test_defchararray.py::TestBasic::test_from_string
-tests/test_defchararray.py::TestBasic::test_from_string_array
-tests/test_defchararray.py::TestBasic::test_from_unicode
-tests/test_defchararray.py::TestBasic::test_from_unicode_array
-tests/test_defchararray.py::TestBasic::test_unicode_upconvert
-tests/test_defchararray.py::TestChar::test_it
-tests/test_defchararray.py::TestComparisonsMixed1::test_equal
-tests/test_defchararray.py::TestComparisonsMixed1::test_greater
-tests/test_defchararray.py::TestComparisonsMixed1::test_greater_equal
-tests/test_defchararray.py::TestComparisonsMixed1::test_less
-tests/test_defchararray.py::TestComparisonsMixed1::test_less_equal
-tests/test_defchararray.py::TestComparisonsMixed1::test_not_equal
-tests/test_defchararray.py::TestComparisonsMixed2::test_equal
-tests/test_defchararray.py::TestComparisonsMixed2::test_greater
-tests/test_defchararray.py::TestComparisonsMixed2::test_greater_equal
-tests/test_defchararray.py::TestComparisonsMixed2::test_less
-tests/test_defchararray.py::TestComparisonsMixed2::test_less_equal
-tests/test_defchararray.py::TestComparisonsMixed2::test_not_equal
-tests/test_defchararray.py::TestComparisons::test_equal
-tests/test_defchararray.py::TestComparisons::test_greater
-tests/test_defchararray.py::TestComparisons::test_greater_equal
-tests/test_defchararray.py::TestComparisons::test_less
-tests/test_defchararray.py::TestComparisons::test_less_equal
-tests/test_defchararray.py::TestComparisons::test_not_equal
-tests/test_defchararray.py::test_empty_indexing
-tests/test_defchararray.py::TestInformation::test_count
-tests/test_defchararray.py::TestInformation::test_endswith
-tests/test_defchararray.py::TestInformation::test_find
-tests/test_defchararray.py::TestInformation::test_index
-tests/test_defchararray.py::TestInformation::test_isalnum
-tests/test_defchararray.py::TestInformation::test_isalpha
-tests/test_defchararray.py::TestInformation::test_isdigit
-tests/test_defchararray.py::TestInformation::test_islower
-tests/test_defchararray.py::TestInformation::test_isspace
-tests/test_defchararray.py::TestInformation::test_istitle
-tests/test_defchararray.py::TestInformation::test_isupper
-tests/test_defchararray.py::TestInformation::test_len
-tests/test_defchararray.py::TestInformation::test_rfind
-tests/test_defchararray.py::TestInformation::test_rindex
-tests/test_defchararray.py::TestInformation::test_startswith
-tests/test_defchararray.py::TestMethods::test_capitalize
-tests/test_defchararray.py::TestMethods::test_center
-tests/test_defchararray.py::TestMethods::test_decode
-tests/test_defchararray.py::TestMethods::test_encode
-tests/test_defchararray.py::TestMethods::test_expandtabs
-tests/test_defchararray.py::TestMethods::test_isdecimal
-tests/test_defchararray.py::TestMethods::test_isnumeric
-tests/test_defchararray.py::TestMethods::test_join
-tests/test_defchararray.py::TestMethods::test_ljust
-tests/test_defchararray.py::TestMethods::test_lower
-tests/test_defchararray.py::TestMethods::test_lstrip
-tests/test_defchararray.py::TestMethods::test_partition
-tests/test_defchararray.py::TestMethods::test_replace
-tests/test_defchararray.py::TestMethods::test_rjust
-tests/test_defchararray.py::TestMethods::test_rpartition
-tests/test_defchararray.py::TestMethods::test_rsplit
-tests/test_defchararray.py::TestMethods::test_rstrip
-tests/test_defchararray.py::TestMethods::test_split
-tests/test_defchararray.py::TestMethods::test_splitlines
-tests/test_defchararray.py::TestMethods::test_strip
-tests/test_defchararray.py::TestMethods::test_swapcase
-tests/test_defchararray.py::TestMethods::test_title
-tests/test_defchararray.py::TestMethods::test_upper
-tests/test_defchararray.py::TestOperations::test_add
-tests/test_defchararray.py::TestOperations::test_mod
-tests/test_defchararray.py::TestOperations::test_mul
-tests/test_defchararray.py::TestOperations::test_radd
-tests/test_defchararray.py::TestOperations::test_rmod
-tests/test_defchararray.py::TestOperations::test_rmul
-tests/test_defchararray.py::TestOperations::test_slice
-tests/test_defchararray.py::TestVecString::test_invalid_args_tuple
-tests/test_defchararray.py::TestVecString::test_invalid_function_args
-tests/test_defchararray.py::TestVecString::test_invalid_result_type
-tests/test_defchararray.py::TestVecString::test_non_string_array
-tests/test_defchararray.py::TestWhitespace::test1
 tests/test_deprecations.py::TestAlen::test_alen
 tests/test_deprecations.py::TestArrayDataAttributeAssignmentDeprecation::test_data_attr_assignment
 tests/test_deprecations.py::TestBinaryReprInsufficientWidthParameterForRepresentation::test_insufficient_width_negative
diff --git a/utils/command_build_clib.py b/utils/command_build_clib.py
index 95887cc65aaa..d16bab3aec4a 100644
--- a/utils/command_build_clib.py
+++ b/utils/command_build_clib.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -63,7 +63,7 @@
 # default variables (for Linux)
 _project_compiler = "icpx"
 _project_linker = "icpx"
-_project_cmplr_flag_sycl_devel = ["-fsycl-device-code-split=per_kernel", "-fno-approx-func"]
+_project_cmplr_flag_sycl_devel = ["-fsycl-device-code-split=per_kernel", "-fno-approx-func", "-fno-finite-math-only"]
 _project_cmplr_flag_sycl = ["-fsycl"]
 _project_cmplr_flag_stdcpp_static = []  # This brakes TBB ["-static-libstdc++", "-static-libgcc"]
 _project_cmplr_flag_compatibility = ["-Wl,--enable-new-dtags"]

From 93e92fcc663a6c18076c2ee51e184769a8dd66d5 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Sat, 18 Feb 2023 15:16:29 +0100
Subject: [PATCH 4/7] Intel LLVM is to use conda's gcc toolchain, sysroot and
 target libraries (#1306)


From 759c6e79bb3ee5379ba277b4245bd2c6aa739d07 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 7 Mar 2023 15:24:43 +0100
Subject: [PATCH 5/7] Merge master into gold/2021 (#1335)

* Add dpnp.broadcast_to() function (#1333)

* Add support of bool type in bitwise operations (#1334)

* Add support of bool type in bitwise operations

* Update dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
---
 .../include/dpnp_gen_2arg_1type_tbl.hpp       |   4 +-
 dpnp/backend/kernels/dpnp_krnl_bitwise.cpp    | 110 ++++++++--
 dpnp/dpnp_algo/dpnp_algo_bitwise.pyx          |   6 +-
 dpnp/dpnp_array.py                            |  69 ++++--
 dpnp/dpnp_iface.py                            |   3 +-
 dpnp/dpnp_iface_bitwise.py                    | 196 +++++++++++-------
 dpnp/dpnp_iface_logic.py                      |  30 ++-
 dpnp/dpnp_iface_manipulation.py               |  44 ++++
 dpnp/dpnp_iface_mathematical.py               |  16 +-
 tests/helper.py                               |   8 +-
 tests/skipped_tests.tbl                       |  14 +-
 tests/skipped_tests_gpu.tbl                   |  14 +-
 tests/test_arraymanipulation.py               | 173 ++++++++++++----
 tests/test_bitwise.py                         |  87 +++++---
 tests/test_sycl_queue.py                      |  12 +-
 tests/test_usm_type.py                        |  26 +++
 .../cupy/binary_tests/test_elementwise.py     |   5 +-
 17 files changed, 602 insertions(+), 215 deletions(-)

diff --git a/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
index 19589c4b0eeb..0330faeee370 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -104,7 +104,7 @@
 
 #endif
 
-MACRO_2ARG_1TYPE_OP(dpnp_bitwise_and_c, input1_elem& input2_elem)
+MACRO_2ARG_1TYPE_OP(dpnp_bitwise_and_c, input1_elem & input2_elem)
 MACRO_2ARG_1TYPE_OP(dpnp_bitwise_or_c, input1_elem | input2_elem)
 MACRO_2ARG_1TYPE_OP(dpnp_bitwise_xor_c, input1_elem ^ input2_elem)
 MACRO_2ARG_1TYPE_OP(dpnp_left_shift_c, input1_elem << input2_elem)
diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
index c082bd636bf9..f3d8a4a95ccd 100644
--- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
@@ -27,6 +27,7 @@
 
 #include "dpnp_fptr.hpp"
 #include "dpnp_iface.hpp"
+#include "dpnp_iterator.hpp"
 #include "dpnp_utils.hpp"
 #include "dpnpc_memory_adapter.hpp"
 #include "queue_sycl.hpp"
@@ -49,27 +50,66 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref,
     sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));
     sycl::event event;
 
-    DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size);
-    _DataType* array1 = input1_ptr.get_ptr();
-    _DataType* result = reinterpret_cast<_DataType*>(result1);
+    _DataType* input_data = static_cast<_DataType*>(array1_in);
+    _DataType* result = static_cast<_DataType*>(result1);
 
-    sycl::range<1> gws(size);
-    auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
-        size_t i = global_id[0]; /*for (size_t i = 0; i < size; ++i)*/
+    constexpr size_t lws = 64;
+    constexpr unsigned int vec_sz = 8;
+
+    auto gws_range = sycl::range<1>(((size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);
+    auto lws_range = sycl::range<1>(lws);
+
+    auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {
+        auto sg = nd_it.get_sub_group();
+        const auto max_sg_size = sg.get_max_local_range()[0];
+        const size_t start =
+            vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size);
+
+        if (start + static_cast<size_t>(vec_sz) * max_sg_size < size)
         {
-            _DataType input_elem1 = array1[i];
-            result[i] = ~input_elem1;
+            using multi_ptrT = sycl::multi_ptr<_DataType, sycl::access::address_space::global_space>;
+
+            sycl::vec<_DataType, vec_sz> x = sg.load<vec_sz>(multi_ptrT(&input_data[start]));
+            sycl::vec<_DataType, vec_sz> res_vec;
+
+            if constexpr (std::is_same_v<_DataType, bool>)
+            {
+#pragma unroll
+                for (size_t k = 0; k < vec_sz; ++k)
+                {
+                    res_vec[k] = !(x[k]);
+                }
+            }
+            else
+            {
+                res_vec = ~x;
+            }
+
+            sg.store<vec_sz>(multi_ptrT(&result[start]), res_vec);
+        }
+        else
+        {
+            for (size_t k = start + sg.get_local_id()[0]; k < size; k += max_sg_size)
+            {
+                if constexpr (std::is_same_v<_DataType, bool>)
+                {
+                    result[k] = !(input_data[k]);
+                }
+                else
+                {
+                    result[k] = ~(input_data[k]);
+                }
+            }
         }
     };
 
     auto kernel_func = [&](sycl::handler& cgh) {
-        cgh.parallel_for<class dpnp_invert_c_kernel<_DataType>>(gws, kernel_parallel_for_func);
+        cgh.parallel_for<class dpnp_invert_c_kernel<_DataType>>(sycl::nd_range<1>(gws_range, lws_range),
+                                                                kernel_parallel_for_func);
     };
-
     event = q.submit(kernel_func);
 
     event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
-
     return DPCTLEvent_Copy(event_ref);
 }
 
@@ -84,6 +124,7 @@ void dpnp_invert_c(void* array1_in, void* result1, size_t size)
                                                            size,
                                                            dep_event_vec_ref);
     DPCTLEvent_WaitAndThrow(event_ref);
+    DPCTLEvent_Delete(event_ref);
 }
 
 template <typename _DataType>
@@ -98,9 +139,11 @@ DPCTLSyclEventRef (*dpnp_invert_ext_c)(DPCTLSyclQueueRef,
 
 static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
 {
+    fmap[DPNPFuncName::DPNP_FN_INVERT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_invert_default_c<bool>};
     fmap[DPNPFuncName::DPNP_FN_INVERT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_INVERT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_default_c<int64_t>};
 
+    fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_invert_ext_c<bool>};
     fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_ext_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_ext_c<int64_t>};
 
@@ -114,6 +157,9 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
     template <typename _KernelNameSpecialization>                                                                      \
     class __name__##_strides_kernel;                                                                                   \
                                                                                                                        \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_broadcast_kernel;                                                                                 \
+                                                                                                                       \
     template <typename _DataType>                                                                                      \
     DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                \
                                void* result_out,                                                                       \
@@ -152,6 +198,8 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
         _DataType* input2_data = static_cast<_DataType*>(const_cast<void*>(input2_in));                                \
         _DataType* result = static_cast<_DataType*>(result_out);                                                       \
                                                                                                                        \
+        bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim);                    \
+                                                                                                                       \
         shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
                                                                                                                        \
         get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
@@ -167,7 +215,42 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
         sycl::event event;                                                                                             \
         sycl::range<1> gws(result_size);                                                                               \
                                                                                                                        \
-        if (use_strides)                                                                                               \
+        if (use_broadcasting)                                                                                          \
+        {                                                                                                              \
+            DPNPC_id<_DataType>* input1_it;                                                                            \
+            const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType>);                                        \
+            input1_it = reinterpret_cast<DPNPC_id<_DataType>*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes));   \
+            new (input1_it) DPNPC_id<_DataType>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim);        \
+                                                                                                                       \
+            input1_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
+                                                                                                                       \
+            DPNPC_id<_DataType>* input2_it;                                                                            \
+            const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType>);                                        \
+            input2_it = reinterpret_cast<DPNPC_id<_DataType>*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes));   \
+            new (input2_it) DPNPC_id<_DataType>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim);        \
+                                                                                                                       \
+            input2_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
+                const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                          \
+                {                                                                                                      \
+                    const _DataType input1_elem = (*input1_it)[i];                                                     \
+                    const _DataType input2_elem = (*input2_it)[i];                                                     \
+                    result[i] = __operation__;                                                                         \
+                }                                                                                                      \
+            };                                                                                                         \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.parallel_for<class __name__##_broadcast_kernel<_DataType>>(gws, kernel_parallel_for_func);         \
+            };                                                                                                         \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            input1_it->~DPNPC_id();                                                                                    \
+            input2_it->~DPNPC_id();                                                                                    \
+                                                                                                                       \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+        else if (use_strides)                                                                                          \
         {                                                                                                              \
             if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim))                                          \
             {                                                                                                          \
@@ -332,18 +415,21 @@ static void func_map_init_bitwise_2arg_1type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_BITWISE_AND][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_and_c_default<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_AND][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_and_c_default<int64_t>};
 
+    fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_and_c_ext<bool>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_and_c_ext<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_and_c_ext<int64_t>};
 
     fmap[DPNPFuncName::DPNP_FN_BITWISE_OR][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_or_c_default<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_OR][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_or_c_default<int64_t>};
 
+    fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_or_c_ext<bool>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_or_c_ext<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_or_c_ext<int64_t>};
 
     fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_xor_c_default<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_xor_c_default<int64_t>};
 
+    fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_xor_c_ext<bool>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_xor_c_ext<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_xor_c_ext<int64_t>};
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx b/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
index 482f00c2c71d..a8af53b709d1 100644
--- a/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -68,8 +68,8 @@ cpdef utils.dpnp_descriptor dpnp_bitwise_xor(utils.dpnp_descriptor x1_obj,
     return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_XOR_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where)
 
 
-cpdef utils.dpnp_descriptor dpnp_invert(utils.dpnp_descriptor arr):
-    return call_fptr_1in_1out(DPNP_FN_INVERT_EXT, arr, arr.shape)
+cpdef utils.dpnp_descriptor dpnp_invert(utils.dpnp_descriptor arr, utils.dpnp_descriptor out=None):
+    return call_fptr_1in_1out(DPNP_FN_INVERT_EXT, arr, arr.shape, out=out, func_name="invert")
 
 
 cpdef utils.dpnp_descriptor dpnp_left_shift(utils.dpnp_descriptor x1_obj,
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index d1ad1252d4ec..f2ccf56ef76b 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -125,7 +125,9 @@ def __abs__(self):
     def __add__(self, other):
         return dpnp.add(self, other)
 
- # '__and__',
+    def __and__(self, other):
+        return dpnp.bitwise_and(self, other)
+
  # '__array__',
  # '__array_finalize__',
  # '__array_function__',
@@ -193,9 +195,17 @@ def __gt__(self, other):
 
  # '__hash__',
  # '__iadd__',
- # '__iand__',
+
+    def __iand__(self, other):
+        dpnp.bitwise_and(self, other, out=self)
+        return self
+
  # '__ifloordiv__',
- # '__ilshift__',
+
+    def __ilshift__(self, other):
+        dpnp.left_shift(self, other, out=self)
+        return self
+
  # '__imatmul__',
  # '__imod__',
  # '__imul__',
@@ -209,18 +219,28 @@ def __index__(self):
     def __int__(self):
         return self._array_obj.__int__()
 
- # '__invert__',
- # '__ior__',
+    def __invert__(self):
+        return dpnp.invert(self)
+
+    def __ior__(self, other):
+        dpnp.bitwise_or(self, other, out=self)
+        return self
 
     def __ipow__(self, other):
         dpnp.power(self, other, out=self)
         return self
 
- # '__irshift__',
+    def __irshift__(self, other):
+        dpnp.right_shift(self, other, out=self)
+        return self
+
  # '__isub__',
  # '__iter__',
  # '__itruediv__',
- # '__ixor__',
+
+    def __ixor__(self, other):
+        dpnp.bitwise_xor(self, other, out=self)
+        return self
 
     def __le__(self, other):
         return dpnp.less_equal(self, other)
@@ -232,7 +252,8 @@ def __len__(self):
 
         return self._array_obj.__len__()
 
- # '__lshift__',
+    def __lshift__(self, other):
+        return dpnp.left_shift(self, other)
 
     def __lt__(self, other):
         return dpnp.less(self, other)
@@ -253,7 +274,10 @@ def __neg__(self):
         return dpnp.negative(self)
 
  # '__new__',
- # '__or__',
+
+    def __or__(self, other):
+        return dpnp.bitwise_or(self, other)
+
  # '__pos__',
 
     def __pow__(self, other):
@@ -262,7 +286,9 @@ def __pow__(self, other):
     def __radd__(self, other):
         return dpnp.add(other, self)
 
- # '__rand__',
+    def __rand__(self, other):
+        return dpnp.bitwise_and(other, self)
+
  # '__rdivmod__',
  # '__reduce__',
  # '__reduce_ex__',
@@ -271,7 +297,9 @@ def __repr__(self):
         return dpt.usm_ndarray_repr(self._array_obj, prefix="array")
 
  # '__rfloordiv__',
- # '__rlshift__',
+
+    def __rlshift__(self, other):
+        return dpnp.left_shift(other, self)
 
     def __rmatmul__(self, other):
         return dpnp.matmul(other, self)
@@ -282,13 +310,17 @@ def __rmod__(self, other):
     def __rmul__(self, other):
         return dpnp.multiply(other, self)
 
- # '__ror__',
- 
+    def __ror__(self, other):
+        return dpnp.bitwise_or(other, self)
+
     def __rpow__(self, other):
         return dpnp.power(other, self)
 
- # '__rrshift__',
- # '__rshift__',
+    def __rrshift__(self, other):
+        return dpnp.right_shift(other, self)
+
+    def __rshift__(self, other):
+        return dpnp.right_shift(self, other)
 
     def __rsub__(self, other):
         return dpnp.subtract(other, self)
@@ -296,7 +328,9 @@ def __rsub__(self, other):
     def __rtruediv__(self, other):
         return dpnp.true_divide(other, self)
 
- # '__rxor__',
+    def __rxor__(self, other):
+        return dpnp.bitwise_xor(other, self)
+
  # '__setattr__',
 
     def __setitem__(self, key, val):
@@ -334,7 +368,8 @@ def __sub__(self, other):
     def __truediv__(self, other):
         return dpnp.true_divide(self, other)
 
- # '__xor__',
+    def __xor__(self, other):
+        return dpnp.bitwise_xor(self, other)
 
     @staticmethod
     def _create_from_usm_ndarray(usm_ary : dpt.usm_ndarray):
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index b7cdef8cc615..9bf456060ddd 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -251,6 +251,7 @@ def from_dlpack(obj, /):
 def get_dpnp_descriptor(ext_obj,
                         copy_when_strides=True,
                         copy_when_nondefault_queue=True,
+                        alloc_dtype=None,
                         alloc_usm_type=None,
                         alloc_queue=None):
     """
@@ -274,7 +275,7 @@ def get_dpnp_descriptor(ext_obj,
     # If input object is a scalar, it means it was allocated on host memory.
     # We need to copy it to USM memory according to compute follows data paradigm.
     if isscalar(ext_obj):
-        ext_obj = array(ext_obj, usm_type=alloc_usm_type, sycl_queue=alloc_queue)
+        ext_obj = array(ext_obj, dtype=alloc_dtype, usm_type=alloc_usm_type, sycl_queue=alloc_queue)
 
     # while dpnp functions have no implementation with strides support
     # we need to create a non-strided copy
diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py
index 51a28b0464ea..36f37f4282ec 100644
--- a/dpnp/dpnp_iface_bitwise.py
+++ b/dpnp/dpnp_iface_bitwise.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -40,19 +40,20 @@
 """
 
 
-import numpy
-
-
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
 import dpnp
 
+import numpy
+import dpctl.tensor as dpt
+
+
 __all__ = [
     'bitwise_and',
+    'bitwise_not',
     'bitwise_or',
     'bitwise_xor',
     'invert',
-    'bitwise_not',
     'left_shift',
     'right_shift',
 ]
@@ -61,37 +62,34 @@
 def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=True, **kwargs):
     """Choose function to call based on input and call chosen fucntion."""
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
-
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif x1_desc and x2_desc and x1_desc.size != x2_desc.size:
-            pass
-        elif x1_desc and x2_desc and x1_desc.shape != x2_desc.shape:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
+    if where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        if dpnp.isscalar(x1) or dpnp.isscalar(x2):
+            usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+            dtype = x1.dtype if not dpnp.isscalar(x1) else x2.dtype
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
-            return dpnp_func(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
+            dtype, usm_type, queue = (None, None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_dtype=dtype, alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_dtype=dtype, alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+            else:
+                out_desc = None
+
+            return dpnp_func(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
 
     return call_origin(origin_func, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
 
@@ -102,14 +100,20 @@ def bitwise_and(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.bitwise_and`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input data is supported as integer only.
+    Data type of input arrays `x` and `y` are limited by :obj:`dpnp.bool`, :obj:`dpnp.int32`
+    and :obj:`dpnp.int64`.
 
     See Also
     --------
@@ -136,14 +140,20 @@ def bitwise_or(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.bitwise_or`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input data is supported as integer only.
+    Data type of input arrays `x` and `y` are limited by :obj:`dpnp.bool`, :obj:`dpnp.int32`
+    and :obj:`dpnp.int64`.
 
     See Also
     --------
@@ -170,14 +180,20 @@ def bitwise_xor(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.bitwise_xor`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input data is supported as integer only.
+    Data type of input arrays `x` and `y` are limited by :obj:`dpnp.bool`, :obj:`dpnp.int32`
+    and :obj:`dpnp.int64`.
 
     See Also
     --------
@@ -198,18 +214,33 @@ def bitwise_xor(x1, x2, dtype=None, out=None, where=True, **kwargs):
     return _check_nd_call(numpy.bitwise_xor, dpnp_bitwise_xor, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
 
 
-def invert(x, **kwargs):
+def invert(x,
+           /,
+           out=None,
+           *,
+           where=True,
+           dtype=None,
+           subok=True,
+           **kwargs):
     """
     Compute bit-wise inversion, or bit-wise NOT, element-wise.
 
     For full documentation refer to :obj:`numpy.invert`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x`` is supported as :obj:`dpnp.ndarray`.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameter `x` is supported as either :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`.
+    Parameters `where`, `dtype` and `subok` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input array ``x`` is supported as integer :obj:`dpnp.ndarray` only.
+    Data type of input array `x` is limited by :obj:`dpnp.bool`, :obj:`dpnp.int32`
+    and :obj:`dpnp.int64`.
 
     See Also
     --------
@@ -220,19 +251,34 @@ def invert(x, **kwargs):
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> x = np.array([13])
-    >>> out = np.invert(x)
+    >>> import dpnp as dp
+    >>> x = dp.array([13])
+    >>> out = dp.invert(x)
     >>> out[0]
     -14
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_nondefault_queue=False)
-    if x1_desc and not kwargs:
-        return dpnp_invert(x1_desc).get_pyobj()
-
-    return call_origin(numpy.invert, x, **kwargs)
+    if kwargs:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    else:
+        x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_nondefault_queue=False)
+        if x1_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+            else:
+                out_desc = None
+        return dpnp_invert(x1_desc, out_desc).get_pyobj()
+
+    return call_origin(numpy.invert, x, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 bitwise_not = invert  # bitwise_not is an alias for invert
@@ -244,12 +290,17 @@ def left_shift(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.left_shift`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
     Input data is supported as integer only.
 
@@ -276,12 +327,17 @@ def right_shift(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.right_shift`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
     Input data is supported as integer only.
 
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 716b2ff8a0f2..e36c44d3f989 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -300,7 +300,8 @@ def equal(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_equal(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.equal, x1, x2)
+
+    return call_origin(numpy.equal, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def greater(x1,
@@ -370,7 +371,8 @@ def greater(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_greater(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.greater, x1, x2)
+
+    return call_origin(numpy.greater, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def greater_equal(x1,
@@ -440,7 +442,8 @@ def greater_equal(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_greater_equal(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.greater_equal, x1, x2)
+
+    return call_origin(numpy.greater_equal, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def isclose(x1, x2, rtol=1e-05, atol=1e-08, equal_nan=False):
@@ -685,7 +688,8 @@ def less(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_less(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.less, x1, x2)
+
+    return call_origin(numpy.less, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def less_equal(x1,
@@ -755,7 +759,8 @@ def less_equal(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_less_equal(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.less_equal, x1, x2)
+
+    return call_origin(numpy.less_equal, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def logical_and(x1,
@@ -824,7 +829,8 @@ def logical_and(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_logical_and(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.logical_and, x1, x2)
+
+    return call_origin(numpy.logical_and, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def logical_not(x,
@@ -881,7 +887,8 @@ def logical_not(x,
         x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_strides=False, copy_when_nondefault_queue=False)
         if x1_desc:
             return dpnp_logical_not(x1_desc).get_pyobj()
-    return call_origin(numpy.logical_not, x)
+
+    return call_origin(numpy.logical_not, x, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def logical_or(x1,
@@ -950,7 +957,8 @@ def logical_or(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_logical_or(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.logical_or, x1, x2)
+
+    return call_origin(numpy.logical_or, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def logical_xor(x1,
@@ -1019,7 +1027,8 @@ def logical_xor(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.logical_xor, x1, x2)
+
+    return call_origin(numpy.logical_xor, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def not_equal(x1,
@@ -1089,4 +1098,5 @@ def not_equal(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_not_equal(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.not_equal, x1, x2)
+
+    return call_origin(numpy.not_equal, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index adc2bdf15f31..567661bdb57f 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -47,7 +47,10 @@
 from dpnp.dpnp_iface_arraycreation import array
 
 import dpnp
+from dpnp.dpnp_array import dpnp_array
+
 import numpy
+import dpctl.tensor as dpt
 
 
 __all__ = [
@@ -55,6 +58,7 @@
     "atleast_1d",
     "atleast_2d",
     "atleast_3d",
+    "broadcast_to",
     "concatenate",
     "copyto",
     "expand_dims",
@@ -190,6 +194,46 @@ def atleast_3d(*arys):
     return call_origin(numpy.atleast_3d, *arys)
 
 
+def broadcast_to(x, /, shape, subok=False):
+    """
+    Broadcast an array to a new shape.
+
+    For full documentation refer to :obj:`numpy.broadcast_to`.
+
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array having a specified shape. Must have the same data type as `x`.
+
+    Limitations
+    -----------
+    Parameter `x` is supported as either :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`.
+    Parameter `subok` is supported with default value.
+    Otherwise the function will be executed sequentially on CPU.
+    Input array data types of `x` is limited by supported DPNP :ref:`Data types`.
+
+    Examples
+    --------
+    >>> import dpnp as dp
+    >>> x = dp.array([1, 2, 3])
+    >>> dp.broadcast_to(x, (3, 3))
+    array([[1, 2, 3],
+           [1, 2, 3],
+           [1, 2, 3]])
+
+    """
+
+    if subok is not False:
+        pass
+    elif isinstance(x, dpnp_array) or isinstance(x, dpt.usm_ndarray):
+        dpt_array = x.get_array() if isinstance(x, dpnp_array) else x
+        new_array = dpt.broadcast_to(dpt_array, shape)
+        return dpnp_array._create_from_usm_ndarray(new_array)
+
+    return call_origin(numpy.broadcast_to, x, shape=shape, subok=subok)
+
+
 def concatenate(arrs, axis=0, out=None, dtype=None, casting="same_kind"):
     """
     Join a sequence of arrays along an existing axis.
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 03d2a3527750..08de8b2ba5a8 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -44,6 +44,7 @@
 from dpnp.dpnp_utils import *
 
 import dpnp
+
 import numpy
 import dpctl.tensor as dpt
 
@@ -1413,15 +1414,14 @@ def power(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
-
-        if out is not None:
-            if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
-                raise TypeError("return array must be of supported array type")
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
-        else:
-            out_desc = None
-
         if x1_desc and x2_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+            else:
+                out_desc = None
+
             return dpnp_power(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
 
     return call_origin(numpy.power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
diff --git a/tests/helper.py b/tests/helper.py
index 8432443d488f..1e97615fb3de 100644
--- a/tests/helper.py
+++ b/tests/helper.py
@@ -27,7 +27,9 @@ def get_float_dtypes(no_float16=True,
     dev = dpctl.select_default_device() if device is None else device
 
     # add floating types
-    dtypes = [dpnp.float16] if not no_float16 else []
+    dtypes = []
+    if not no_float16 and dev.has_aspect_fp16:
+        dtypes.append(dpnp.float16)
 
     dtypes.append(dpnp.float32)
     if dev.has_aspect_fp64:
@@ -64,11 +66,11 @@ def get_all_dtypes(no_bool=False,
     dtypes.extend([dpnp.int32, dpnp.int64])
 
     # add floating types
-    dtypes.extend(get_float_dtypes(dev))
+    dtypes.extend(get_float_dtypes(no_float16=no_float16, device=dev))
 
     # add complex types
     if not no_complex:
-        dtypes.extend(get_complex_dtypes(dev))
+        dtypes.extend(get_complex_dtypes(device=dev))
 
     # add None value to validate a default dtype
     if not no_none:
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 26dd6fc59cd7..ecc5bd5e999a 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -192,12 +192,7 @@ tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray(x).astype(dpnp.int8)]
 
 tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float32-1]
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_and
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_or
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_xor
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_invert
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_left_shift
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_right_shift
+
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestAngle::test_angle
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag_inplace
@@ -648,12 +643,7 @@ tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_8_{s
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_8_{shapes=[(2, 0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast_arrays
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast_arrays
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_fail
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_fail_numpy19
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_numpy19
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_short_shape
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_short_shape_numpy19
+
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure1
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure2
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure1
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 7e9b9e5505de..d3864a05b7e6 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -359,12 +359,7 @@ tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: (dpnp
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([(i, i) for i in x], [("a", object), ("b", dpnp.int32)])]]
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray(x).astype(dpnp.int8)]
 tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float32-1]
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_and
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_or
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_xor
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_invert
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_left_shift
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_right_shift
+
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestAngle::test_angle
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag_inplace
@@ -853,12 +848,7 @@ tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_8_{s
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_8_{shapes=[(2, 0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast_arrays
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast_arrays
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_fail
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_fail_numpy19
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_numpy19
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_short_shape
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_short_shape_numpy19
+
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure1
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure2
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure1
diff --git a/tests/test_arraymanipulation.py b/tests/test_arraymanipulation.py
index f22e8175c3b2..6a2b452917b0 100644
--- a/tests/test_arraymanipulation.py
+++ b/tests/test_arraymanipulation.py
@@ -2,7 +2,16 @@
 from .helper import get_all_dtypes
 
 import dpnp
+
 import numpy
+from numpy.testing import (
+    assert_,
+    assert_allclose,
+    assert_array_equal,
+    assert_equal,
+    assert_raises,
+    assert_warns
+)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -14,7 +23,7 @@ def test_asfarray(dtype, data):
     expected = numpy.asfarray(data, dtype)
     result = dpnp.asfarray(data, dtype)
 
-    numpy.testing.assert_array_equal(result, expected)
+    assert_array_equal(result, expected)
 
 
 @pytest.mark.parametrize("dtype", get_all_dtypes())
@@ -24,7 +33,99 @@ def test_asfarray2(dtype, data, data_dtype):
     expected = numpy.asfarray(numpy.array(data, dtype=data_dtype), dtype)
     result = dpnp.asfarray(dpnp.array(data, dtype=data_dtype), dtype)
 
-    numpy.testing.assert_array_equal(result, expected)
+    assert_array_equal(result, expected)
+
+
+class TestDims:
+    @pytest.mark.parametrize("dt", get_all_dtypes())
+    @pytest.mark.parametrize("sh",
+                             [(0,), (1,), (3,)],
+                             ids=['(0,)', '(1,)', '(3,)'])
+    def test_broadcast_array(self, sh, dt):
+        np_a = numpy.array(0, dtype=dt)
+        dp_a = dpnp.array(0, dtype=dt)
+        func = lambda xp, a: xp.broadcast_to(a, sh)
+
+        assert_allclose(func(numpy, np_a), func(dpnp, dp_a))
+
+    @pytest.mark.parametrize("dt", get_all_dtypes())
+    @pytest.mark.parametrize("sh",
+                             [(1,), (2,), (1, 2, 3)],
+                             ids=['(1,)', '(2,)', '(1, 2, 3)'])
+    def test_broadcast_ones(self, sh, dt):
+        np_a = numpy.ones(1, dtype=dt)
+        dp_a = dpnp.ones(1, dtype=dt)
+        func = lambda xp, a: xp.broadcast_to(a, sh)
+
+        assert_allclose(func(numpy, np_a), func(dpnp, dp_a))
+
+    @pytest.mark.parametrize("dt", get_all_dtypes(no_bool=True))
+    @pytest.mark.parametrize("sh",
+                             [(3,), (1, 3), (2, 3)],
+                             ids=['(3,)', '(1, 3)', '(2, 3)'])
+    def test_broadcast_arange(self, sh, dt):
+        np_a = numpy.arange(3, dtype=dt)
+        dp_a = dpnp.arange(3, dtype=dt)
+        func = lambda xp, a: xp.broadcast_to(a, sh)
+
+        assert_allclose(func(numpy, np_a), func(dpnp, dp_a))
+
+    @pytest.mark.parametrize("dt", get_all_dtypes())
+    @pytest.mark.parametrize(
+        "sh1, sh2",
+        [
+            pytest.param([0], [0], id="(0)"),
+            pytest.param([1], [1], id="(1)"),
+            pytest.param([1], [2], id="(2)"),
+        ],
+    )
+    def test_broadcast_not_tuple(self, sh1, sh2, dt):
+        np_a = numpy.ones(sh1, dtype=dt)
+        dp_a = dpnp.ones(sh1, dtype=dt)
+        func = lambda xp, a: xp.broadcast_to(a, sh2)
+
+        assert_allclose(func(numpy, np_a), func(dpnp, dp_a))
+
+    @pytest.mark.parametrize("dt", get_all_dtypes())
+    @pytest.mark.parametrize(
+        "sh1, sh2",
+        [
+            pytest.param([1], (0,), id="(0,)"),
+            pytest.param((1, 2), (0, 2), id="(0, 2)"),
+            pytest.param((2, 1), (2, 0), id="(2, 0)"),
+        ],
+    )
+    def test_broadcast_zero_shape(self, sh1, sh2, dt):
+        np_a = numpy.ones(sh1, dtype=dt)
+        dp_a = dpnp.ones(sh1, dtype=dt)
+        func = lambda xp, a: xp.broadcast_to(a, sh2)
+
+        assert_allclose(func(numpy, np_a), func(dpnp, dp_a))
+
+    @pytest.mark.parametrize(
+        "sh1, sh2",
+        [
+            pytest.param((0,), (), id="(0,)-()"),
+            pytest.param((1,), (), id="(1,)-()"),
+            pytest.param((3,), (), id="(3,)-()"),
+            pytest.param((3,), (1,), id="(3,)-(1,)"),
+            pytest.param((3,), (2,), id="(3,)-(2,)"),
+            pytest.param((3,), (4,), id="(3,)-(4,)"),
+            pytest.param((1, 2), (2, 1), id="(1, 2)-(2, 1)"),
+            pytest.param((1, 2), (1,), id="(1, 2)-(1,)"),
+            pytest.param((1,), -1, id="(1,)--1"),
+            pytest.param((1,), (-1,), id="(1,)-(-1,)"),
+            pytest.param((1, 2), (-1, 2), id="(1, 2)-(-1, 2)"),
+        ],
+    )
+    def test_broadcast_raise(self, sh1, sh2):
+        np_a = numpy.zeros(sh1)
+        dp_a = dpnp.zeros(sh1)
+        func = lambda xp, a: xp.broadcast_to(a, sh2)
+
+        with pytest.raises(ValueError):
+            func(numpy, np_a)
+            func(dpnp, dp_a)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -38,62 +139,62 @@ def test_returns_copy(self):
     def test_large_concatenate_axis_None(self):
         x = dpnp.arange(1, 100)
         r = dpnp.concatenate(x, None)
-        numpy.testing.assert_array_equal(x, r)
+        assert_array_equal(x, r)
         r = dpnp.concatenate(x, 100)
-        numpy.testing.assert_array_equal(x, r)
+        assert_array_equal(x, r)
 
     def test_concatenate(self):
         # Test concatenate function
         # One sequence returns unmodified (but as array)
         r4 = list(range(4))
-        numpy.testing.assert_array_equal(dpnp.concatenate((r4,)), r4)
+        assert_array_equal(dpnp.concatenate((r4,)), r4)
         # Any sequence
-        numpy.testing.assert_array_equal(dpnp.concatenate((tuple(r4),)), r4)
-        numpy.testing.assert_array_equal(dpnp.concatenate((dpnp.array(r4),)), r4)
+        assert_array_equal(dpnp.concatenate((tuple(r4),)), r4)
+        assert_array_equal(dpnp.concatenate((dpnp.array(r4),)), r4)
         # 1D default concatenation
         r3 = list(range(3))
-        numpy.testing.assert_array_equal(dpnp.concatenate((r4, r3)), r4 + r3)
+        assert_array_equal(dpnp.concatenate((r4, r3)), r4 + r3)
         # Mixed sequence types
-        numpy.testing.assert_array_equal(dpnp.concatenate((tuple(r4), r3)), r4 + r3)
-        numpy.testing.assert_array_equal(
+        assert_array_equal(dpnp.concatenate((tuple(r4), r3)), r4 + r3)
+        assert_array_equal(
             dpnp.concatenate((dpnp.array(r4), r3)), r4 + r3
         )
         # Explicit axis specification
-        numpy.testing.assert_array_equal(dpnp.concatenate((r4, r3), 0), r4 + r3)
+        assert_array_equal(dpnp.concatenate((r4, r3), 0), r4 + r3)
         # Including negative
-        numpy.testing.assert_array_equal(dpnp.concatenate((r4, r3), -1), r4 + r3)
+        assert_array_equal(dpnp.concatenate((r4, r3), -1), r4 + r3)
         # 2D
         a23 = dpnp.array([[10, 11, 12], [13, 14, 15]])
         a13 = dpnp.array([[0, 1, 2]])
         res = dpnp.array([[10, 11, 12], [13, 14, 15], [0, 1, 2]])
-        numpy.testing.assert_array_equal(dpnp.concatenate((a23, a13)), res)
-        numpy.testing.assert_array_equal(dpnp.concatenate((a23, a13), 0), res)
-        numpy.testing.assert_array_equal(dpnp.concatenate((a23.T, a13.T), 1), res.T)
-        numpy.testing.assert_array_equal(dpnp.concatenate((a23.T, a13.T), -1), res.T)
+        assert_array_equal(dpnp.concatenate((a23, a13)), res)
+        assert_array_equal(dpnp.concatenate((a23, a13), 0), res)
+        assert_array_equal(dpnp.concatenate((a23.T, a13.T), 1), res.T)
+        assert_array_equal(dpnp.concatenate((a23.T, a13.T), -1), res.T)
         # Arrays much match shape
-        numpy.testing.assert_raises(ValueError, dpnp.concatenate, (a23.T, a13.T), 0)
+        assert_raises(ValueError, dpnp.concatenate, (a23.T, a13.T), 0)
         # 3D
         res = dpnp.reshape(dpnp.arange(2 * 3 * 7), (2, 3, 7))
         a0 = res[..., :4]
         a1 = res[..., 4:6]
         a2 = res[..., 6:]
-        numpy.testing.assert_array_equal(dpnp.concatenate((a0, a1, a2), 2), res)
-        numpy.testing.assert_array_equal(dpnp.concatenate((a0, a1, a2), -1), res)
-        numpy.testing.assert_array_equal(dpnp.concatenate((a0.T, a1.T, a2.T), 0), res.T)
+        assert_array_equal(dpnp.concatenate((a0, a1, a2), 2), res)
+        assert_array_equal(dpnp.concatenate((a0, a1, a2), -1), res)
+        assert_array_equal(dpnp.concatenate((a0.T, a1.T, a2.T), 0), res.T)
 
         out = dpnp.copy(res)
         rout = dpnp.concatenate((a0, a1, a2), 2, out=out)
-        numpy.testing.assert_(out is rout)
-        numpy.testing.assert_equal(res, rout)
+        assert_(out is rout)
+        assert_equal(res, rout)
 
 
 class TestHstack:
     def test_non_iterable(self):
-        numpy.testing.assert_raises(TypeError, dpnp.hstack, 1)
+        assert_raises(TypeError, dpnp.hstack, 1)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_empty_input(self):
-        numpy.testing.assert_raises(ValueError, dpnp.hstack, ())
+        assert_raises(ValueError, dpnp.hstack, ())
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_0D_array(self):
@@ -101,7 +202,7 @@ def test_0D_array(self):
         a = dpnp.array(1)
         res = dpnp.hstack([a, b])
         desired = dpnp.array([1, 2])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_1D_array(self):
@@ -109,7 +210,7 @@ def test_1D_array(self):
         b = dpnp.array([2])
         res = dpnp.hstack([a, b])
         desired = dpnp.array([1, 2])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_2D_array(self):
@@ -117,22 +218,22 @@ def test_2D_array(self):
         b = dpnp.array([[1], [2]])
         res = dpnp.hstack([a, b])
         desired = dpnp.array([[1, 1], [2, 2]])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     def test_generator(self):
-        with numpy.testing.assert_warns(FutureWarning):
+        with assert_warns(FutureWarning):
             dpnp.hstack((numpy.arange(3) for _ in range(2)))
-        with numpy.testing.assert_warns(FutureWarning):
+        with assert_warns(FutureWarning):
             dpnp.hstack(map(lambda x: x, numpy.ones((3, 2))))
 
 
 class TestVstack:
     def test_non_iterable(self):
-        numpy.testing.assert_raises(TypeError, dpnp.vstack, 1)
+        assert_raises(TypeError, dpnp.vstack, 1)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_empty_input(self):
-        numpy.testing.assert_raises(ValueError, dpnp.vstack, ())
+        assert_raises(ValueError, dpnp.vstack, ())
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_0D_array(self):
@@ -140,7 +241,7 @@ def test_0D_array(self):
         b = dpnp.array(2)
         res = dpnp.vstack([a, b])
         desired = dpnp.array([[1], [2]])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_1D_array(self):
@@ -148,7 +249,7 @@ def test_1D_array(self):
         b = dpnp.array([2])
         res = dpnp.vstack([a, b])
         desired = dpnp.array([[1], [2]])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_2D_array(self):
@@ -156,7 +257,7 @@ def test_2D_array(self):
         b = dpnp.array([[1], [2]])
         res = dpnp.vstack([a, b])
         desired = dpnp.array([[1], [2], [1], [2]])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_2D_array2(self):
@@ -164,8 +265,8 @@ def test_2D_array2(self):
         b = dpnp.array([1, 2])
         res = dpnp.vstack([a, b])
         desired = dpnp.array([[1, 2], [1, 2]])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     def test_generator(self):
-        with numpy.testing.assert_warns(FutureWarning):
+        with assert_warns(FutureWarning):
             dpnp.vstack((numpy.arange(3) for _ in range(2)))
diff --git a/tests/test_bitwise.py b/tests/test_bitwise.py
index 645ae4556c1c..34f7f971c86b 100644
--- a/tests/test_bitwise.py
+++ b/tests/test_bitwise.py
@@ -3,60 +3,97 @@
 import dpnp as inp
 
 import numpy
+from numpy.testing import (
+    assert_array_equal
+)
 
 
 @pytest.mark.parametrize("lhs", [[[-7, -6, -5, -4, -3, -2, -1], [0, 1, 2, 3, 4, 5, 6]], [-3, -2, -1, 0, 1, 2, 3], 0])
 @pytest.mark.parametrize("rhs", [[[0, 1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12, 13]], [0, 1, 2, 3, 4, 5, 6], 3])
-@pytest.mark.parametrize("dtype", [numpy.int32, numpy.int64])
+@pytest.mark.parametrize("dtype", [inp.bool, inp.int32, inp.int64])
 class TestBitwise:
 
     @staticmethod
     def array_or_scalar(xp, data, dtype=None):
         if numpy.isscalar(data):
+            if dtype == inp.bool:
+                return numpy.dtype(dtype).type(data)
             return data
 
         return xp.array(data, dtype=dtype)
 
     def _test_unary_int(self, name, data, dtype):
-        a = self.array_or_scalar(inp, data, dtype=dtype)
-        result = getattr(inp, name)(a)
+        dp_a = self.array_or_scalar(inp, data, dtype=dtype)
+        result = getattr(inp, name)(dp_a)
 
-        a = self.array_or_scalar(numpy, data, dtype=dtype)
-        expected = getattr(numpy, name)(a)
+        np_a = self.array_or_scalar(numpy, data, dtype=dtype)
+        expected = getattr(numpy, name)(np_a)
 
-        numpy.testing.assert_array_equal(result, expected)
+        assert_array_equal(result, expected)
+        return (dp_a, np_a)
 
     def _test_binary_int(self, name, lhs, rhs, dtype):
-        a = self.array_or_scalar(inp, lhs, dtype=dtype)
-        b = self.array_or_scalar(inp, rhs, dtype=dtype)
-        result = getattr(inp, name)(a, b)
+        if name in ('left_shift', 'right_shift') and dtype == inp.bool:
+            pytest.skip("A shift operation isn't implemented for bool type")
+        elif numpy.isscalar(lhs) and numpy.isscalar(rhs):
+            pytest.skip("Both inputs can't be scalars")
 
-        a = self.array_or_scalar(numpy, lhs, dtype=dtype)
-        b = self.array_or_scalar(numpy, rhs, dtype=dtype)
-        expected = getattr(numpy, name)(a, b)
+        dp_a = self.array_or_scalar(inp, lhs, dtype=dtype)
+        dp_b = self.array_or_scalar(inp, rhs, dtype=dtype)
+        result = getattr(inp, name)(dp_a, dp_b)
 
-        numpy.testing.assert_array_equal(result, expected)
+        np_a = self.array_or_scalar(numpy, lhs, dtype=dtype)
+        np_b = self.array_or_scalar(numpy, rhs, dtype=dtype)
+        expected = getattr(numpy, name)(np_a, np_b)
+
+        assert_array_equal(result, expected)
+        return (dp_a, dp_b, np_a, np_b)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_bitwise_and(self, lhs, rhs, dtype):
-        self._test_binary_int('bitwise_and', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('bitwise_and', lhs, rhs, dtype)
+        assert_array_equal(dp_a & dp_b, np_a & np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a &= dp_b
+            np_a &= np_b
+            assert_array_equal(dp_a, np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_bitwise_or(self, lhs, rhs, dtype):
-        self._test_binary_int('bitwise_or', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('bitwise_or', lhs, rhs, dtype)
+        assert_array_equal(dp_a | dp_b, np_a | np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a |= dp_b
+            np_a |= np_b
+            assert_array_equal(dp_a, np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_bitwise_xor(self, lhs, rhs, dtype):
-        self._test_binary_int('bitwise_xor', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('bitwise_xor', lhs, rhs, dtype)
+        assert_array_equal(dp_a ^ dp_b, np_a ^ np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a ^= dp_b
+            np_a ^= np_b
+            assert_array_equal(dp_a, np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_invert(self, lhs, rhs, dtype):
-        self._test_unary_int('invert', lhs, dtype)
+        dp_a, np_a = self._test_unary_int('invert', lhs, dtype)
+        assert_array_equal(~dp_a, ~np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_left_shift(self, lhs, rhs, dtype):
-        self._test_binary_int('left_shift', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('left_shift', lhs, rhs, dtype)
+        assert_array_equal(dp_a << dp_b, np_a << np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a <<= dp_b
+            np_a <<= np_b
+            assert_array_equal(dp_a, np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_right_shift(self, lhs, rhs, dtype):
-        self._test_binary_int('right_shift', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('right_shift', lhs, rhs, dtype)
+        assert_array_equal(dp_a >> dp_b, np_a >> np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a >>= dp_b
+            np_a >>= np_b
+            assert_array_equal(dp_a, np_a)
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 42cbe7459513..a523c46465bf 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -927,8 +927,7 @@ def test_from_dlpack(arr_dtype, shape, device):
 @pytest.mark.parametrize("device",
                          valid_devices,
                          ids=[device.filter_string for device in valid_devices])
-#TODO need to delete no_bool=True when use dlpack > 0.7 version
-@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True, no_bool=True))
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True))
 def test_from_dlpack_with_dpt(arr_dtype, device):
     X = dpctl.tensor.empty((64,), dtype=arr_dtype, device=device)
     Y = dpnp.from_dlpack(X)
@@ -937,3 +936,12 @@ def test_from_dlpack_with_dpt(arr_dtype, device):
     assert X.__dlpack_device__() == Y.__dlpack_device__()
     assert X.usm_type == Y.usm_type
     assert_sycl_queue_equal(X.sycl_queue, Y.sycl_queue)
+
+
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+def test_broadcast_to(device):
+    x = dpnp.arange(5, device=device)
+    y = dpnp.broadcast_to(x, (3, 5))
+    assert_sycl_queue_equal(x.sycl_queue, y.sycl_queue)
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 96d55f6875c5..817bdee66a57 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -146,6 +146,24 @@ def test_coerced_usm_types_logic_op(op, usm_type_x, usm_type_y):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
+@pytest.mark.parametrize("op",
+                         ['bitwise_and', 'bitwise_or', 'bitwise_xor', 'left_shift', 'right_shift'],
+                         ids=['bitwise_and', 'bitwise_or', 'bitwise_xor', 'left_shift', 'right_shift'])
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_bitwise_op(op, usm_type_x, usm_type_y):
+    x = dp.arange(25, usm_type = usm_type_x)
+    y = dp.arange(25, usm_type = usm_type_y)[::-1]
+
+    z = getattr(dp, op)(x, y)
+    zx = getattr(dp, op)(x, 7)
+    zy = getattr(dp, op)(12, y)
+
+    assert x.usm_type == zx.usm_type == usm_type_x
+    assert y.usm_type == zy.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
 def test_meshgrid(usm_type_x, usm_type_y):
@@ -155,6 +173,7 @@ def test_meshgrid(usm_type_x, usm_type_y):
     assert z[0].usm_type == usm_type_x
     assert z[1].usm_type == usm_type_y
 
+
 @pytest.mark.parametrize(
     "func,data1,data2",
     [
@@ -173,3 +192,10 @@ def test_2in_1out(func, data1, data2, usm_type_x, usm_type_y):
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+def test_broadcast_to(usm_type):
+    x = dp.ones(7, usm_type=usm_type)
+    y = dp.broadcast_to(x, (2, 7))
+    assert x.usm_type == y.usm_type
diff --git a/tests/third_party/cupy/binary_tests/test_elementwise.py b/tests/third_party/cupy/binary_tests/test_elementwise.py
index b2212e043f23..a01cbb082a37 100644
--- a/tests/third_party/cupy/binary_tests/test_elementwise.py
+++ b/tests/third_party/cupy/binary_tests/test_elementwise.py
@@ -1,18 +1,19 @@
 import unittest
 
+import numpy
 from tests.third_party.cupy import testing
 
 
 @testing.gpu
 class TestElementwise(unittest.TestCase):
 
-    @testing.for_int_dtypes()
+    @testing.for_dtypes((numpy.bool_, numpy.int32, numpy.int64))
     @testing.numpy_cupy_array_equal()
     def check_unary_int(self, name, xp, dtype):
         a = xp.array([-3, -2, -1, 0, 1, 2, 3], dtype=dtype)
         return getattr(xp, name)(a)
 
-    @testing.for_int_dtypes()
+    @testing.for_dtypes((numpy.int32, numpy.int64))
     @testing.numpy_cupy_array_equal()
     def check_binary_int(self, name, xp, dtype):
         a = xp.array([-3, -2, -1, 0, 1, 2, 3], dtype=dtype)

From f8ae52e4530b842baba4477d4be49d86a32380ef Mon Sep 17 00:00:00 2001
From: Evseniia Komarova <evseniia.komarova@intel.com>
Date: Wed, 3 May 2023 14:06:40 +0200
Subject: [PATCH 6/7] set numpy 1.21 (#1390)

---
 conda-recipe/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 0c6e38f667db..05f431541918 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -8,7 +8,7 @@ requirements:
     host:
       - python
       - setuptools
-      - numpy 1.19
+      - numpy 1.21
       - cython
       - cmake >=3.19
       - dpctl >=0.14

From b00215354d733f340f65d4eb7bc230620a3687f1 Mon Sep 17 00:00:00 2001
From: vlad-perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 8 May 2023 12:41:35 +0200
Subject: [PATCH 7/7] Update meta.yaml (#1392)

* Pin DPC++ 2023.1.0

* Require minimum version of sysroot >= 2.17
---
 conda-recipe/meta.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 05f431541918..cf9150fc3297 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -18,7 +18,8 @@ requirements:
       - wheel
     build:
       - {{ compiler('cxx') }}
-      - {{ compiler('dpcpp') }}  >=2023.0  # [not osx]
+      - {{ compiler('dpcpp') }}  =2023.1.0  # [not osx]
+      - sysroot_linux-64 >=2.17  # [linux]
     run:
       - python
       - dpctl >=0.14