diff --git a/numba_dppy/dppy_host_fn_call_gen.py b/numba_dppy/dppy_host_fn_call_gen.py
index 7d1c9bcea4..2808ddf90d 100644
--- a/numba_dppy/dppy_host_fn_call_gen.py
+++ b/numba_dppy/dppy_host_fn_call_gen.py
@@ -52,7 +52,7 @@ def _init_llvm_types_and_constants(self):
         self.byte_ptr_t      = lc.Type.pointer(self.byte_t)
         self.byte_ptr_ptr_t  = lc.Type.pointer(self.byte_ptr_t)
         self.intp_t          = self.context.get_value_type(types.intp)
-        self.long_t          = self.context.get_value_type(types.int64)
+        self.int64_t         = self.context.get_value_type(types.int64)
         self.int32_t         = self.context.get_value_type(types.int32)
         self.int32_ptr_t     = lc.Type.pointer(self.int32_t)
         self.uintp_t         = self.context.get_value_type(types.uintp)
@@ -113,23 +113,26 @@ def allocate_kenrel_arg_array(self, num_kernel_args):
 
 
     def resolve_and_return_dpctl_type(self, ty):
+        """This function looks up the dpctl defined enum values from DPCTLKernelArgType.
+        """
+
         val = None
         if ty == types.int32 or isinstance(ty, types.scalars.IntegerLiteral):
-            val = self.context.get_constant(types.int32, 4)
+            val = self.context.get_constant(types.int32, 9)  # DPCTL_LONG_LONG
         elif ty == types.uint32:
-            val = self.context.get_constant(types.int32, 5)
+            val = self.context.get_constant(types.int32, 10)  # DPCTL_UNSIGNED_LONG_LONG
         elif ty == types.boolean:
-            val = self.context.get_constant(types.int32, 5)
+            val = self.context.get_constant(types.int32, 5)  # DPCTL_UNSIGNED_INT
         elif ty == types.int64:
-            val = self.context.get_constant(types.int32, 7)
+            val = self.context.get_constant(types.int32, 9)  # DPCTL_LONG_LONG
         elif ty == types.uint64:
-            val = self.context.get_constant(types.int32, 8)
+            val = self.context.get_constant(types.int32, 11)  # DPCTL_SIZE_T
         elif ty == types.float32:
-            val = self.context.get_constant(types.int32, 12)
+            val = self.context.get_constant(types.int32, 12)  # DPCTL_FLOAT
         elif ty == types.float64:
-            val = self.context.get_constant(types.int32, 13)
+            val = self.context.get_constant(types.int32, 13)  # DPCTL_DOUBLE
         elif ty == types.voidptr:
-            val = self.context.get_constant(types.int32, 15)
+            val = self.context.get_constant(types.int32, 15)  # DPCTL_VOID_PTR
         else:
             raise NotImplementedError
 
@@ -151,12 +154,12 @@ def process_kernel_arg(self, var, llvm_arg, arg_type, gu_sig, val_type, index, m
             if llvm_arg is None:
                 raise NotImplementedError(arg_type, var)
 
-            storage = cgutils.alloca_once(self.builder, self.long_t)
+            storage = cgutils.alloca_once(self.builder, self.int64_t)
             self.builder.store(self.context.get_constant(types.int64, 0), storage)
             ty = self.resolve_and_return_dpctl_type(types.int64)
             self.form_kernel_arg_and_arg_ty(self.builder.bitcast(storage, self.void_ptr_t), ty)
 
-            storage = cgutils.alloca_once(self.builder, self.long_t)
+            storage = cgutils.alloca_once(self.builder, self.int64_t)
             self.builder.store(self.context.get_constant(types.int64, 0), storage)
             ty = self.resolve_and_return_dpctl_type(types.int64)
             self.form_kernel_arg_and_arg_ty(self.builder.bitcast(storage, self.void_ptr_t), ty)
diff --git a/numba_dppy/examples/pa_examples/test1-2d.py b/numba_dppy/examples/pa_examples/test1-2d.py
index 7985216aba..df3849b30d 100644
--- a/numba_dppy/examples/pa_examples/test1-2d.py
+++ b/numba_dppy/examples/pa_examples/test1-2d.py
@@ -1,23 +1,29 @@
 from numba import njit, gdb
 import numpy as np
+import dpctl
 
-@njit(parallel={'offload':True})
+
+@njit
 def f1(a, b):
     c = a + b
     return c
 
+
 N = 1000
 print("N", N)
 
-a = np.ones((N,N), dtype=np.float32)
-b = np.ones((N,N), dtype=np.float32)
+a = np.ones((N, N), dtype=np.float32)
+b = np.ones((N, N), dtype=np.float32)
 
 print("a:", a, hex(a.ctypes.data))
 print("b:", b, hex(b.ctypes.data))
-c = f1(a,b)
+
+with dpctl.device_context("opencl:gpu:0"):
+    c = f1(a, b)
+
 print("BIG RESULT c:", c, hex(c.ctypes.data))
 for i in range(N):
     for j in range(N):
-        if c[i,j] != 2.0:
+        if c[i, j] != 2.0:
             print("First index not equal to 2.0 was", i, j)
             break
diff --git a/numba_dppy/examples/pa_examples/test1-3d.py b/numba_dppy/examples/pa_examples/test1-3d.py
index 1304c0762a..a69aa0cbc5 100644
--- a/numba_dppy/examples/pa_examples/test1-3d.py
+++ b/numba_dppy/examples/pa_examples/test1-3d.py
@@ -1,24 +1,30 @@
 from numba import njit, gdb
 import numpy as np
+import dpctl
 
-@njit(parallel={'offload':True})
+
+@njit
 def f1(a, b):
     c = a + b
     return c
 
+
 N = 10
 print("N", N)
 
-a = np.ones((N,N,N), dtype=np.float32)
-b = np.ones((N,N,N), dtype=np.float32)
+a = np.ones((N, N, N), dtype=np.float32)
+b = np.ones((N, N, N), dtype=np.float32)
 
 print("a:", a, hex(a.ctypes.data))
 print("b:", b, hex(b.ctypes.data))
-c = f1(a,b)
+
+with dpctl.device_context("opencl:gpu:0"):
+    c = f1(a, b)
+
 print("BIG RESULT c:", c, hex(c.ctypes.data))
 for i in range(N):
     for j in range(N):
         for k in range(N):
-            if c[i,j,k] != 2.0:
+            if c[i, j, k] != 2.0:
                 print("First index not equal to 2.0 was", i, j, k)
                 break
diff --git a/numba_dppy/examples/pa_examples/test1-4d.py b/numba_dppy/examples/pa_examples/test1-4d.py
index bb52da28de..2647d0e66e 100644
--- a/numba_dppy/examples/pa_examples/test1-4d.py
+++ b/numba_dppy/examples/pa_examples/test1-4d.py
@@ -1,25 +1,31 @@
 from numba import njit, gdb
 import numpy as np
+import dpctl
 
-@njit(parallel={'offload':True})
+
+@njit
 def f1(a, b):
     c = a + b
     return c
 
+
 N = 10
 print("N", N)
 
-a = np.ones((N,N,N,N), dtype=np.float32)
-b = np.ones((N,N,N,N), dtype=np.float32)
+a = np.ones((N, N, N, N), dtype=np.float32)
+b = np.ones((N, N, N, N), dtype=np.float32)
 
 print("a:", a, hex(a.ctypes.data))
 print("b:", b, hex(b.ctypes.data))
-c = f1(a,b)
+
+with dpctl.device_context("opencl:gpu:0"):
+    c = f1(a, b)
+
 print("BIG RESULT c:", c, hex(c.ctypes.data))
 for i in range(N):
     for j in range(N):
         for k in range(N):
             for l in range(N):
-                if c[i,j,k,l] != 2.0:
+                if c[i, j, k, l] != 2.0:
                     print("First index not equal to 2.0 was", i, j, k, l)
                     break
diff --git a/numba_dppy/examples/pa_examples/test1-5d.py b/numba_dppy/examples/pa_examples/test1-5d.py
index e795dbe602..893fe3b6a6 100644
--- a/numba_dppy/examples/pa_examples/test1-5d.py
+++ b/numba_dppy/examples/pa_examples/test1-5d.py
@@ -1,26 +1,32 @@
 from numba import njit, gdb
 import numpy as np
+import dpctl
 
-@njit(parallel={'offload':True})
+
+@njit
 def f1(a, b):
     c = a + b
     return c
 
+
 N = 5
 print("N", N)
 
-a = np.ones((N,N,N,N,N), dtype=np.float32)
-b = np.ones((N,N,N,N,N), dtype=np.float32)
+a = np.ones((N, N, N, N, N), dtype=np.float32)
+b = np.ones((N, N, N, N, N), dtype=np.float32)
 
 print("a:", a, hex(a.ctypes.data))
 print("b:", b, hex(b.ctypes.data))
-c = f1(a,b)
+
+with dpctl.device_context("opencl:gpu:0"):
+    c = f1(a, b)
+
 print("BIG RESULT c:", c, hex(c.ctypes.data))
 for i in range(N):
     for j in range(N):
         for k in range(N):
             for l in range(N):
                 for m in range(N):
-                    if c[i,j,k,l,m] != 2.0:
+                    if c[i, j, k, l, m] != 2.0:
                         print("First index not equal to 2.0 was", i, j, k, l, m)
                         break
diff --git a/numba_dppy/examples/pa_examples/test1.py b/numba_dppy/examples/pa_examples/test1.py
index 1620654cf8..01209b3309 100644
--- a/numba_dppy/examples/pa_examples/test1.py
+++ b/numba_dppy/examples/pa_examples/test1.py
@@ -1,8 +1,9 @@
 from numba import njit
 import numpy as np
+import dpctl
 
 
-@njit(parallel={'offload':True})
+@njit
 def f1(a, b):
     c = a + b
     return c
@@ -19,7 +20,10 @@ def main():
 
     print("a:", a, hex(a.ctypes.data))
     print("b:", b, hex(b.ctypes.data))
-    c = f1(a,b)
+
+    with dpctl.device_context("opencl:gpu:0"):
+        c = f1(a, b)
+
     print("RESULT c:", c, hex(c.ctypes.data))
     for i in range(N):
         if c[i] != 2.0:
diff --git a/numba_dppy/tests/test_dpnp_functions.py b/numba_dppy/tests/test_dpnp_functions.py
index b0837f5ba6..c4749885ba 100644
--- a/numba_dppy/tests/test_dpnp_functions.py
+++ b/numba_dppy/tests/test_dpnp_functions.py
@@ -5,7 +5,9 @@
 import sys
 import numpy as np
 from numba import njit
-import numba_dppy, numba_dppy as dppy
+import numba_dppy
+import numba_dppy as dppy
+import dpctl
 from numba_dppy.testing import unittest
 from numba_dppy.testing import DPPYTestCase
 
@@ -14,10 +16,14 @@ def test_for_different_datatypes(fn, test_fn, dims, arg_count, tys, np_all=False
     if arg_count == 1:
         for ty in tys:
             if matrix and matrix[0]:
-                a = np.array(np.random.random(dims[0] * dims[1]), dtype=ty).reshape(dims[0], dims[1])
+                a = np.array(np.random.random(
+                    dims[0] * dims[1]), dtype=ty).reshape(dims[0], dims[1])
             else:
                 a = np.array(np.random.random(dims[0]), dtype=ty)
-            c = fn(a)
+
+            with dpctl.device_context("opencl:gpu"):
+                c = fn(a)
+
             d = test_fn(a)
             if np_all:
                 max_abs_err = np.all(c - d)
@@ -29,15 +35,19 @@ def test_for_different_datatypes(fn, test_fn, dims, arg_count, tys, np_all=False
     elif arg_count == 2:
         for ty in tys:
             if matrix and matrix[0]:
-                a = np.array(np.random.random(dims[0] * dims[1]), dtype=ty).reshape(dims[0], dims[1])
+                a = np.array(np.random.random(
+                    dims[0] * dims[1]), dtype=ty).reshape(dims[0], dims[1])
             else:
                 a = np.array(np.random.random(dims[0] * dims[1]), dtype=ty)
             if matrix and matrix[1]:
-                b = np.array(np.random.random(dims[2] * dims[3]), dtype=ty).reshape(dims[2], dims[3])
+                b = np.array(np.random.random(
+                    dims[2] * dims[3]), dtype=ty).reshape(dims[2], dims[3])
             else:
                 b = np.array(np.random.random(dims[2] * dims[3]), dtype=ty)
 
-            c = fn(a, b)
+            with dpctl.device_context("opencl:gpu"):
+                c = fn(a, b)
+
             d = test_fn(a, b)
             if np_all:
                 max_abs_err = np.sum(c - d)
@@ -48,6 +58,7 @@ def test_for_different_datatypes(fn, test_fn, dims, arg_count, tys, np_all=False
 
     return True
 
+
 def test_for_dimensions(fn, test_fn, dims, tys, np_all=False):
     total_size = 1
     for d in dims:
@@ -55,7 +66,10 @@ def test_for_dimensions(fn, test_fn, dims, tys, np_all=False):
 
     for ty in tys:
         a = np.array(np.random.random(total_size), dtype=ty).reshape(dims)
-        c = fn(a)
+
+        with dpctl.device_context("opencl:gpu"):
+            c = fn(a)
+
         d = test_fn(a)
         if np_all:
             max_abs_err = np.all(c - d)
@@ -66,6 +80,7 @@ def test_for_dimensions(fn, test_fn, dims, tys, np_all=False):
 
     return True
 
+
 def ensure_dpnp():
     try:
        # import dpnp
@@ -75,8 +90,9 @@ def ensure_dpnp():
         return False
 
 
-@unittest.skipUnless(ensure_dpnp(), 'test only when dpNP is available')
+@unittest.skipUnless(ensure_dpnp() and dpctl.has_gpu_queues(), 'test only when dpNP and GPU is available')
 class Testdpnp_functions(DPPYTestCase):
+
     N = 10
 
     a = np.array(np.random.random(N), dtype=np.float32)
@@ -84,123 +100,140 @@ class Testdpnp_functions(DPPYTestCase):
     tys = [np.int32, np.uint32, np.int64, np.uint64, np.float, np.double]
 
     def test_sum(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.sum(a)
             return c
 
-        self.assertTrue(test_for_different_datatypes(f, np.sum, [10], 1, self.tys))
+        self.assertTrue(test_for_different_datatypes(
+            f, np.sum, [10], 1, self.tys))
         self.assertTrue(test_for_dimensions(f, np.sum, [10, 2], self.tys))
         self.assertTrue(test_for_dimensions(f, np.sum, [10, 2, 3], self.tys))
 
     def test_prod(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.prod(a)
             return c
 
-        self.assertTrue(test_for_different_datatypes(f, np.prod, [10], 1, self.tys))
+        self.assertTrue(test_for_different_datatypes(
+            f, np.prod, [10], 1, self.tys))
         self.assertTrue(test_for_dimensions(f, np.prod, [10, 2], self.tys))
         self.assertTrue(test_for_dimensions(f, np.prod, [10, 2, 3], self.tys))
 
     def test_argmax(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.argmax(a)
             return c
 
-        self.assertTrue(test_for_different_datatypes(f, np.argmax, [10], 1, self.tys))
+        self.assertTrue(test_for_different_datatypes(
+            f, np.argmax, [10], 1, self.tys))
         self.assertTrue(test_for_dimensions(f, np.argmax, [10, 2], self.tys))
-        self.assertTrue(test_for_dimensions(f, np.argmax, [10, 2, 3], self.tys))
+        self.assertTrue(test_for_dimensions(
+            f, np.argmax, [10, 2, 3], self.tys))
 
     def test_max(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.max(a)
             return c
 
-        self.assertTrue(test_for_different_datatypes(f, np.max, [10], 1, self.tys))
+        self.assertTrue(test_for_different_datatypes(
+            f, np.max, [10], 1, self.tys))
         self.assertTrue(test_for_dimensions(f, np.max, [10, 2], self.tys))
         self.assertTrue(test_for_dimensions(f, np.max, [10, 2, 3], self.tys))
 
     def test_argmin(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.argmin(a)
             return c
 
-        self.assertTrue(test_for_different_datatypes(f, np.argmin, [10], 1, self.tys))
+        self.assertTrue(test_for_different_datatypes(
+            f, np.argmin, [10], 1, self.tys))
         self.assertTrue(test_for_dimensions(f, np.argmin, [10, 2], self.tys))
-        self.assertTrue(test_for_dimensions(f, np.argmin, [10, 2, 3], self.tys))
+        self.assertTrue(test_for_dimensions(
+            f, np.argmin, [10, 2, 3], self.tys))
 
     def test_min(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.min(a)
             return c
 
-        self.assertTrue(test_for_different_datatypes(f, np.min, [10], 1, self.tys))
+        self.assertTrue(test_for_different_datatypes(
+            f, np.min, [10], 1, self.tys))
         self.assertTrue(test_for_dimensions(f, np.min, [10, 2], self.tys))
         self.assertTrue(test_for_dimensions(f, np.min, [10, 2, 3], self.tys))
 
     def test_argsort(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.argsort(a)
             return c
 
-        self.assertTrue(test_for_different_datatypes(f, np.argmin, [10], 1, self.tys, np_all=True))
+        self.assertTrue(test_for_different_datatypes(
+            f, np.argmin, [10], 1, self.tys, np_all=True))
 
     def test_median(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.median(a)
             return c
 
-        self.assertTrue(test_for_different_datatypes(f, np.median, [10], 1, self.tys))
+        self.assertTrue(test_for_different_datatypes(
+            f, np.median, [10], 1, self.tys))
         self.assertTrue(test_for_dimensions(f, np.median, [10, 2], self.tys))
-        self.assertTrue(test_for_dimensions(f, np.median, [10, 2, 3], self.tys))
+        self.assertTrue(test_for_dimensions(
+            f, np.median, [10, 2, 3], self.tys))
 
     def test_mean(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.mean(a)
             return c
 
-        self.assertTrue(test_for_different_datatypes(f, np.mean, [10], 1, self.tys))
+        self.assertTrue(test_for_different_datatypes(
+            f, np.mean, [10], 1, self.tys))
         self.assertTrue(test_for_dimensions(f, np.mean, [10, 2], self.tys))
         self.assertTrue(test_for_dimensions(f, np.mean, [10, 2, 3], self.tys))
 
     def test_matmul(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.matmul(a, b)
             return c
 
-        self.assertTrue(test_for_different_datatypes(f, np.matmul, [10, 5, 5, 10], 2, [np.float, np.double], np_all=True, matrix=[True, True]))
+        self.assertTrue(test_for_different_datatypes(f, np.matmul, [10, 5, 5, 10], 2, [
+                        np.float, np.double], np_all=True, matrix=[True, True]))
 
     def test_dot(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.dot(a, b)
             return c
 
-        self.assertTrue(test_for_different_datatypes(f, np.dot, [10, 1, 10, 1], 2, [np.float, np.double]))
-        self.assertTrue(test_for_different_datatypes(f, np.dot, [10, 1, 10, 2], 2, [np.float, np.double], matrix=[False, True], np_all=True))
-        self.assertTrue(test_for_different_datatypes(f, np.dot, [2, 10, 10, 1], 2, [np.float, np.double], matrix=[True, False], np_all=True))
-        self.assertTrue(test_for_different_datatypes(f, np.dot, [10, 2, 2, 10], 2, [np.float, np.double], matrix=[True, True], np_all=True))
-
+        self.assertTrue(test_for_different_datatypes(
+            f, np.dot, [10, 1, 10, 1], 2, [np.float, np.double]))
+        self.assertTrue(test_for_different_datatypes(f, np.dot, [10, 1, 10, 2], 2, [
+                        np.float, np.double], matrix=[False, True], np_all=True))
+        self.assertTrue(test_for_different_datatypes(f, np.dot, [2, 10, 10, 1], 2, [
+                        np.float, np.double], matrix=[True, False], np_all=True))
+        self.assertTrue(test_for_different_datatypes(f, np.dot, [10, 2, 2, 10], 2, [
+                        np.float, np.double], matrix=[True, True], np_all=True))
 
     def test_cov(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.cov(a)
             return c
 
-        self.assertTrue(test_for_different_datatypes(f, np.cov, [10, 7], 1, self.tys, matrix=[True], np_all=True))
+        self.assertTrue(test_for_different_datatypes(
+            f, np.cov, [10, 7], 1, self.tys, matrix=[True], np_all=True))
 
     def test_dpnp_interacting_with_parfor(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.sum(a)
             e = np.add(b, a)
diff --git a/numba_dppy/tests/test_dppl_fallback.py b/numba_dppy/tests/test_dppl_fallback.py
index 8519f4fb14..76792f5744 100644
--- a/numba_dppy/tests/test_dppl_fallback.py
+++ b/numba_dppy/tests/test_dppl_fallback.py
@@ -3,7 +3,8 @@
 import numpy as np
 
 import numba
-import numba_dppy, numba_dppy as dppy
+import numba_dppy
+import numba_dppy as dppy
 from numba_dppy.testing import unittest
 from numba_dppy.testing import DPPYTestCase
 from numba.tests.support import captured_stderr
@@ -28,28 +29,30 @@ def inner_call_fallback():
 
             return a
 
-        with captured_stderr() as msg:
-            dppy = numba.njit(parallel={'offload':True})(inner_call_fallback)
+        with captured_stderr() as msg, dpctl.device_context("opencl:gpu"):
+            dppy = numba.njit(inner_call_fallback)
             dppy_result = dppy()
 
         ref_result = inner_call_fallback()
 
         np.testing.assert_array_equal(dppy_result, ref_result)
-        self.assertTrue('Failed to lower parfor on DPPY-device' in msg.getvalue())
+        self.assertTrue(
+            'Failed to lower parfor on DPPY-device' in msg.getvalue())
 
     def test_dppy_fallback_reductions(self):
         def reduction(a):
             return np.amax(a)
 
         a = np.ones(10)
-        with captured_stderr() as msg:
-            dppy = numba.njit(parallel={'offload':True})(reduction)
+        with captured_stderr() as msg, dpctl.device_context("opencl:gpu"):
+            dppy = numba.njit(reduction)
             dppy_result = dppy(a)
 
         ref_result = reduction(a)
 
         np.testing.assert_array_equal(dppy_result, ref_result)
-        self.assertTrue('Failed to lower parfor on DPPY-device' in msg.getvalue())
+        self.assertTrue(
+            'Failed to lower parfor on DPPY-device' in msg.getvalue())
 
 
 if __name__ == '__main__':
diff --git a/numba_dppy/tests/test_numpy_bit_twiddling_functions.py b/numba_dppy/tests/test_numpy_bit_twiddling_functions.py
index de6b7bc963..8d022a0bb1 100644
--- a/numba_dppy/tests/test_numpy_bit_twiddling_functions.py
+++ b/numba_dppy/tests/test_numpy_bit_twiddling_functions.py
@@ -5,104 +5,115 @@
 import sys
 import numpy as np
 from numba import njit
-import numba_dppy, numba_dppy as dppy
+import numba_dppy
+import numba_dppy as dppy
+import dpctl
 from numba_dppy.testing import unittest
 from numba_dppy.testing import DPPYTestCase
 
 
+@unittest.skipUnless(dpctl.has_gpu_queues(), 'test only on GPU system')
 class TestNumpy_bit_twiddling_functions(DPPYTestCase):
     def test_bitwise_and(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.bitwise_and(a, b)
             return c
 
-        a = np.array([2,5,255])
-        b = np.array([3,14,16])
+        a = np.array([2, 5, 255])
+        b = np.array([3, 14, 16])
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a, b)
 
-        c = f(a, b)
         d = np.bitwise_and(a, b)
         self.assertTrue(np.all(c == d))
 
-
     def test_bitwise_or(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.bitwise_or(a, b)
             return c
 
-        a = np.array([2,5,255])
-        b = np.array([4,4,4])
+        a = np.array([2, 5, 255])
+        b = np.array([4, 4, 4])
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a, b)
 
-        c = f(a, b)
         d = np.bitwise_or(a, b)
         self.assertTrue(np.all(c == d))
 
-
     def test_bitwise_xor(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.bitwise_xor(a, b)
             return c
 
-        a = np.array([2,5,255])
-        b = np.array([4,4,4])
+        a = np.array([2, 5, 255])
+        b = np.array([4, 4, 4])
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a, b)
 
-        c = f(a, b)
         d = np.bitwise_xor(a, b)
         self.assertTrue(np.all(c == d))
 
-
     def test_bitwise_not(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.bitwise_not(a)
             return c
 
-        a = np.array([2,5,255])
+        a = np.array([2, 5, 255])
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a)
 
-        c = f(a)
         d = np.bitwise_not(a)
         self.assertTrue(np.all(c == d))
 
-
     def test_invert(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.invert(a)
             return c
 
-        a = np.array([2,5,255])
+        a = np.array([2, 5, 255])
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a)
 
-        c = f(a)
         d = np.invert(a)
         self.assertTrue(np.all(c == d))
 
-
     def test_left_shift(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.left_shift(a, b)
             return c
 
-        a = np.array([2,3,4])
-        b = np.array([1,2,3])
+        a = np.array([2, 3, 4])
+        b = np.array([1, 2, 3])
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a, b)
 
-        c = f(a, b)
         d = np.left_shift(a, b)
         self.assertTrue(np.all(c == d))
 
-
     def test_right_shift(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.right_shift(a, b)
             return c
 
-        a = np.array([2,3,4])
-        b = np.array([1,2,3])
+        a = np.array([2, 3, 4])
+        b = np.array([1, 2, 3])
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a, b)
 
-        c = f(a, b)
         d = np.right_shift(a, b)
         self.assertTrue(np.all(c == d))
 
diff --git a/numba_dppy/tests/test_numpy_comparison_functions.py b/numba_dppy/tests/test_numpy_comparison_functions.py
index 5daf1fc813..53a8eed890 100644
--- a/numba_dppy/tests/test_numpy_comparison_functions.py
+++ b/numba_dppy/tests/test_numpy_comparison_functions.py
@@ -5,81 +5,92 @@
 import sys
 import numpy as np
 from numba import njit
-import numba_dppy, numba_dppy as dppy
+import numba_dppy
+import numba_dppy as dppy
+import dpctl
 from numba_dppy.testing import unittest
 from numba_dppy.testing import DPPYTestCase
 
+
+@unittest.skipUnless(dpctl.has_gpu_queues(), 'test only on GPU system')
 class TestNumpy_comparison_functions(DPPYTestCase):
-    a = np.array([4,5,6])
-    b = np.array([2,6,6])
+    a = np.array([4, 5, 6])
+    b = np.array([2, 6, 6])
+
     def test_greater(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.greater(a, b)
             return c
 
-        c = f(self.a, self.b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a, self.b)
+
         d = np.greater(self.a, self.b)
         self.assertTrue(np.all(c == d))
 
-
     def test_greater_equal(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.greater_equal(a, b)
             return c
 
-        c = f(self.a, self.b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a, self.b)
+
         d = np.greater_equal(self.a, self.b)
         self.assertTrue(np.all(c == d))
 
-
     def test_less(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.less(a, b)
             return c
 
-        c = f(self.a, self.b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a, self.b)
+
         d = np.less(self.a, self.b)
         self.assertTrue(np.all(c == d))
 
-
     def test_less_equal(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.less_equal(a, b)
             return c
 
-        c = f(self.a, self.b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a, self.b)
+
         d = np.less_equal(self.a, self.b)
         self.assertTrue(np.all(c == d))
 
-
     def test_not_equal(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.not_equal(a, b)
             return c
 
-        c = f(self.a, self.b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a, self.b)
+
         d = np.not_equal(self.a, self.b)
         self.assertTrue(np.all(c == d))
 
-
     def test_equal(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.equal(a, b)
             return c
 
-        c = f(self.a, self.b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a, self.b)
+
         d = np.equal(self.a, self.b)
         self.assertTrue(np.all(c == d))
 
-
     def test_logical_and(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.logical_and(a, b)
             return c
@@ -87,13 +98,14 @@ def f(a, b):
         a = np.array([True, True, False])
         b = np.array([True, False, False])
 
-        c = f(a, b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a, b)
+
         d = np.logical_and(a, b)
         self.assertTrue(np.all(c == d))
 
-
     def test_logical_or(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.logical_or(a, b)
             return c
@@ -101,13 +113,14 @@ def f(a, b):
         a = np.array([True, True, False])
         b = np.array([True, False, False])
 
-        c = f(a, b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a, b)
+
         d = np.logical_or(a, b)
         self.assertTrue(np.all(c == d))
 
-
     def test_logical_xor(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.logical_xor(a, b)
             return c
@@ -115,76 +128,83 @@ def f(a, b):
         a = np.array([True, True, False])
         b = np.array([True, False, False])
 
-        c = f(a, b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a, b)
+
         d = np.logical_xor(a, b)
         self.assertTrue(np.all(c == d))
 
-
     def test_logical_not(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.logical_not(a)
             return c
 
         a = np.array([True, True, False])
 
-        c = f(a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a)
+
         d = np.logical_not(a)
         self.assertTrue(np.all(c == d))
 
-
     def test_maximum(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.maximum(a, b)
             return c
 
-        a = np.array([5,6,7,np.nan], dtype=np.float32)
-        b = np.array([5,7,6,100], dtype=np.float32)
+        a = np.array([5, 6, 7, np.nan], dtype=np.float32)
+        b = np.array([5, 7, 6, 100], dtype=np.float32)
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a, b)
 
-        c = f(a, b)
         d = np.maximum(a, b)
         np.testing.assert_equal(c, d)
 
-
     def test_minimum(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.minimum(a, b)
             return c
 
-        a = np.array([5,6,7,np.nan], dtype=np.float32)
-        b = np.array([5,7,6,100], dtype=np.float32)
+        a = np.array([5, 6, 7, np.nan], dtype=np.float32)
+        b = np.array([5, 7, 6, 100], dtype=np.float32)
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a, b)
 
-        c = f(a, b)
         d = np.minimum(a, b)
         np.testing.assert_equal(c, d)
 
-
     def test_fmax(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.fmax(a, b)
             return c
 
-        a = np.array([5,6,7,np.nan], dtype=np.float32)
-        b = np.array([5,7,6,100], dtype=np.float32)
+        a = np.array([5, 6, 7, np.nan], dtype=np.float32)
+        b = np.array([5, 7, 6, 100], dtype=np.float32)
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a, b)
 
-        c = f(a, b)
         d = np.fmax(a, b)
         np.testing.assert_equal(c, d)
 
-
     def test_fmin(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.fmin(a, b)
             return c
 
-        a = np.array([5,6,7,np.nan], dtype=np.float32)
-        b = np.array([5,7,6,100], dtype=np.float32)
+        a = np.array([5, 6, 7, np.nan], dtype=np.float32)
+        b = np.array([5, 7, 6, 100], dtype=np.float32)
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(a, b)
 
-        c = f(a, b)
         d = np.fmin(a, b)
         np.testing.assert_equal(c, d)
 
diff --git a/numba_dppy/tests/test_numpy_floating_functions.py b/numba_dppy/tests/test_numpy_floating_functions.py
index c05c10498d..fb7c1b98e8 100644
--- a/numba_dppy/tests/test_numpy_floating_functions.py
+++ b/numba_dppy/tests/test_numpy_floating_functions.py
@@ -4,92 +4,102 @@
 import sys
 import numpy as np
 from numba import njit
-import numba_dppy, numba_dppy as dppy
+import numba_dppy
+import numba_dppy as dppy
+import dpctl
 from numba_dppy.testing import unittest
 from numba_dppy.testing import DPPYTestCase
 
 
+@unittest.skipUnless(dpctl.has_gpu_queues(), 'test only on GPU system')
 class TestNumpy_floating_functions(DPPYTestCase):
     def test_isfinite(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.isfinite(a)
             return c
 
-        test_arr = [np.log(-1.),1.,np.log(0)]
+        test_arr = [np.log(-1.), 1., np.log(0)]
         input_arr = np.asarray(test_arr, dtype=np.float32)
 
-        c = f(input_arr)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.isfinite(input_arr)
         self.assertTrue(np.all(c == d))
 
-
     def test_isinf(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.isinf(a)
             return c
 
-        test_arr = [np.log(-1.),1.,np.log(0)]
+        test_arr = [np.log(-1.), 1., np.log(0)]
         input_arr = np.asarray(test_arr, dtype=np.float32)
 
-        c = f(input_arr)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.isinf(input_arr)
         self.assertTrue(np.all(c == d))
 
     def test_isnan(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.isnan(a)
             return c
 
-        test_arr = [np.log(-1.),1.,np.log(0)]
+        test_arr = [np.log(-1.), 1., np.log(0)]
         input_arr = np.asarray(test_arr, dtype=np.float32)
 
-        c = f(input_arr)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.isnan(input_arr)
         self.assertTrue(np.all(c == d))
 
-
     def test_floor(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.floor(a)
             return c
 
         input_arr = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0])
 
-        c = f(input_arr)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.floor(input_arr)
         self.assertTrue(np.all(c == d))
 
-
     def test_ceil(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.ceil(a)
             return c
 
         input_arr = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0])
 
-        c = f(input_arr)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.ceil(input_arr)
         self.assertTrue(np.all(c == d))
 
-
     def test_trunc(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.trunc(a)
             return c
 
         input_arr = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0])
 
-        c = f(input_arr)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.trunc(input_arr)
         self.assertTrue(np.all(c == d))
 
 
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/numba_dppy/tests/test_numpy_math_functions.py b/numba_dppy/tests/test_numpy_math_functions.py
index 155b352c7e..7af014d4d8 100644
--- a/numba_dppy/tests/test_numpy_math_functions.py
+++ b/numba_dppy/tests/test_numpy_math_functions.py
@@ -5,79 +5,95 @@
 import sys
 import numpy as np
 from numba import njit
-import numba_dppy, numba_dppy as dppy
+import numba_dppy
+import numba_dppy as dppy
+import dpctl
 from numba_dppy.testing import unittest
 from numba_dppy.testing import DPPYTestCase
 
 
+@unittest.skipUnless(dpctl.has_gpu_queues(), 'test only on GPU system')
 class TestNumpy_math_functions(DPPYTestCase):
+
     N = 10
     a = np.array(np.random.random(N), dtype=np.float32)
     b = np.array(np.random.random(N), dtype=np.float32)
 
     def test_add(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.add(a, b)
             return c
 
-        c = f(self.a, self.b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a, self.b)
+
         d = self.a + self.b
         self.assertTrue(np.all(c == d))
 
     def test_subtract(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.subtract(a, b)
             return c
 
-        c = f(self.a, self.b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a, self.b)
+
         d = self.a - self.b
         self.assertTrue(np.all(c == d))
 
     def test_multiply(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.multiply(a, b)
             return c
 
-        c = f(self.a, self.b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a, self.b)
+
         d = self.a * self.b
         self.assertTrue(np.all(c == d))
 
     def test_divide(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.divide(a, b)
             return c
 
-        c = f(self.a, self.b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a, self.b)
+
         d = self.a / self.b
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-2)
 
     def test_true_divide(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.true_divide(a, b)
             return c
 
-        c = f(self.a, self.b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a, self.b)
+
         d = np.true_divide(self.a, self.b)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-2)
 
     def test_negative(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.negative(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         self.assertTrue(np.all(c == -self.a))
 
     def test_power(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.power(a, b)
             return c
@@ -85,11 +101,13 @@ def f(a, b):
         input_arr = np.random.randint(self.N, size=(self.N))
         exp = np.full((self.N), 2, dtype=np.int)
 
-        c = f(input_arr, exp)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr, exp)
+
         self.assertTrue(np.all(c == input_arr * input_arr))
 
     def test_remainder(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.remainder(a, b)
             return c
@@ -97,11 +115,13 @@ def f(a, b):
         input_arr = np.full((self.N), 3, dtype=np.int)
         divisor = np.full((self.N), 2, dtype=np.int)
 
-        c = f(input_arr, divisor)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr, divisor)
+
         self.assertTrue(np.all(c == 1))
 
     def test_mod(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.mod(a, b)
             return c
@@ -109,11 +129,13 @@ def f(a, b):
         input_arr = np.full((self.N), 3, dtype=np.int)
         divisor = np.full((self.N), 2, dtype=np.int)
 
-        c = f(input_arr, divisor)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr, divisor)
+
         self.assertTrue(np.all(c == 1))
 
     def test_fmod(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.fmod(a, b)
             return c
@@ -121,173 +143,201 @@ def f(a, b):
         input_arr = np.full((self.N), 3, dtype=np.float32)
         divisor = np.full((self.N), 2, dtype=np.int)
 
-        c = f(input_arr, divisor)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr, divisor)
+
         self.assertTrue(np.all(c == 1.))
 
     def test_abs(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.abs(a)
             return c
 
         input_arr = 5 * np.random.random_sample(self.N) - 5
 
-        c = f(input_arr)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         self.assertTrue(np.all(c == -input_arr))
 
     def test_absolute(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.absolute(a)
             return c
 
         input_arr = 5 * np.random.random_sample(self.N) - 5
 
-        c = f(input_arr)
-        self.assertTrue(np.all(c == -input_arr))
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
 
+        self.assertTrue(np.all(c == -input_arr))
 
     def test_fabs(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.fabs(a)
             return c
 
         input_arr = 5 * np.random.random_sample(self.N) - 5
 
-        c = f(input_arr)
-        self.assertTrue(np.all(c == -input_arr))
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
 
+        self.assertTrue(np.all(c == -input_arr))
 
     def test_sign(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.sign(a)
             return c
 
         input_arr = 5 * np.random.random_sample(self.N) - 5
 
-        c = f(input_arr)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         self.assertTrue(np.all(c == -1.))
 
     def test_conj(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.conj(a)
             return c
 
         input_arr = np.eye(self.N) + 1j * np.eye(self.N)
 
-        c = f(input_arr)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.conj(input_arr)
         self.assertTrue(np.all(c == d))
 
     def test_exp(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.exp(a)
             return c
 
         input_arr = np.random.randint(self.N, size=(self.N))
-        c = f(input_arr)
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.exp(input_arr)
         self.assertTrue(np.all(c == d))
 
-
     def test_log(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.log(a)
             return c
 
         input_arr = np.random.randint(1, self.N, size=(self.N))
-        c = f(input_arr)
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.log(input_arr)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_log10(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.log10(a)
             return c
 
         input_arr = np.random.randint(1, self.N, size=(self.N))
-        c = f(input_arr)
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.log10(input_arr)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_expm1(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.expm1(a)
             return c
 
         input_arr = np.random.randint(1, self.N, size=(self.N))
-        c = f(input_arr)
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.expm1(input_arr)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_log1p(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.log1p(a)
             return c
 
         input_arr = np.random.randint(1, self.N, size=(self.N))
-        c = f(input_arr)
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.log1p(input_arr)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
     def test_sqrt(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.sqrt(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.sqrt(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_square(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.square(a)
             return c
 
         input_arr = np.random.randint(self.N, size=(self.N))
 
-        c = f(input_arr)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         self.assertTrue(np.all(c == input_arr * input_arr))
 
     def test_reciprocal(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.reciprocal(a)
             return c
 
-        input_arr =  5 * np.random.random_sample(self.N) + 5
+        input_arr = 5 * np.random.random_sample(self.N) + 5
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
 
-        c = f(input_arr)
         self.assertTrue(np.all(c == 1/input_arr))
 
     def test_conjugate(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.conjugate(a)
             return c
 
         input_arr = np.eye(self.N) + 1j * np.eye(self.N)
 
-        c = f(input_arr)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.conj(input_arr)
         self.assertTrue(np.all(c == d))
 
diff --git a/numba_dppy/tests/test_numpy_trigonomteric_functions.py b/numba_dppy/tests/test_numpy_trigonomteric_functions.py
index 7ce18b870a..361273cdee 100644
--- a/numba_dppy/tests/test_numpy_trigonomteric_functions.py
+++ b/numba_dppy/tests/test_numpy_trigonomteric_functions.py
@@ -5,214 +5,239 @@
 import sys
 import numpy as np
 from numba import njit
-import numba_dppy, numba_dppy as dppy
+import numba_dppy
+import numba_dppy as dppy
+import dpctl
 from numba_dppy.testing import unittest
 from numba_dppy.testing import DPPYTestCase
 
 
+@unittest.skipUnless(dpctl.has_gpu_queues(), 'test only on GPU system')
 class TestNumpy_math_functions(DPPYTestCase):
+
     N = 10
     a = np.array(np.random.random(N), dtype=np.float32)
     b = np.array(np.random.random(N), dtype=np.float32)
 
     def test_sin(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.sin(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.sin(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_cos(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.cos(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.cos(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_tan(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.tan(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.tan(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_arcsin(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.arcsin(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.arcsin(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_arccos(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.arccos(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.arccos(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_arctan(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.arctan(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.arctan(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_arctan2(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             c = np.arctan2(a, b)
             return c
 
-        c = f(self.a, self.b)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a, self.b)
+
         d = np.arctan2(self.a, self.b)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_sinh(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.sinh(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.sinh(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_cosh(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.cosh(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.cosh(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_tanh(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.tanh(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.tanh(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_arcsinh(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.arcsinh(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.arcsinh(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_arccosh(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.arccosh(a)
             return c
 
         input_arr = np.random.randint(1, self.N, size=(self.N))
-        c = f(input_arr)
+
+        with dpctl.device_context("opencl:gpu"):
+            c = f(input_arr)
+
         d = np.arccosh(input_arr)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_arctanh(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.arctanh(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.arctanh(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_deg2rad(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.deg2rad(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.deg2rad(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
 
-
     def test_rad2deg(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.rad2deg(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.rad2deg(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-2)
 
     def test_degrees(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.degrees(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.degrees(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-2)
 
     def test_radians(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a):
             c = np.radians(a)
             return c
 
-        c = f(self.a)
+        with dpctl.device_context("opencl:gpu"):
+            c = f(self.a)
+
         d = np.radians(self.a)
         max_abs_err = c.sum() - d.sum()
         self.assertTrue(max_abs_err < 1e-5)
diff --git a/numba_dppy/tests/test_parfor_lower_message.py b/numba_dppy/tests/test_parfor_lower_message.py
index 591fd2cb0e..9f4660e01f 100644
--- a/numba_dppy/tests/test_parfor_lower_message.py
+++ b/numba_dppy/tests/test_parfor_lower_message.py
@@ -1,8 +1,10 @@
 import numpy as np
 import numba
 from numba import njit, prange
-import numba_dppy, numba_dppy as dppy
-from numba_dppy.testing import unittest, DPPYTestCase
+import numba_dppy
+import numba_dppy as dppy
+from numba_dppy.testing import unittest
+from numba_dppy.testing import DPPYTestCase
 from numba.tests.support import captured_stdout
 import dpctl
 
@@ -23,7 +25,7 @@ class TestParforMessage(DPPYTestCase):
     def test_parfor_message(self):
         with dpctl.device_context("opencl:gpu") as gpu_queue:
             numba_dppy.compiler.DEBUG = 1
-            jitted = njit(parallel={"offload": True})(prange_example)
+            jitted = njit(prange_example)
 
             with captured_stdout() as got:
                 jitted()
diff --git a/numba_dppy/tests/test_prange.py b/numba_dppy/tests/test_prange.py
index f4c13c4b1f..3a8948d716 100644
--- a/numba_dppy/tests/test_prange.py
+++ b/numba_dppy/tests/test_prange.py
@@ -5,16 +5,19 @@
 import sys
 import numpy as np
 import numba
+import dpctl
 from numba import njit, prange
-import numba_dppy, numba_dppy as dppy
+import numba_dppy
+import numba_dppy as dppy
 from numba_dppy.testing import unittest, expectedFailureIf
 from numba_dppy.testing import DPPYTestCase
 from numba.tests.support import captured_stdout
 
 
+@unittest.skipUnless(dpctl.has_gpu_queues(), "test only on GPU system")
 class TestPrange(DPPYTestCase):
     def test_one_prange(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             for i in prange(4):
                 b[i, 0] = a[i, 0] * 10
@@ -24,14 +27,14 @@ def f(a, b):
         a = np.ones((m, n))
         b = np.ones((m, n))
 
-        f(a, b)
+        with dpctl.device_context("opencl:gpu"):
+            f(a, b)
 
         for i in range(4):
             self.assertTrue(b[i, 0] == a[i, 0] * 10)
 
-
     def test_nested_prange(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             # dimensions must be provided as scalar
             m, n = a.shape
@@ -44,12 +47,13 @@ def f(a, b):
         a = np.ones((m, n))
         b = np.ones((m, n))
 
-        f(a, b)
-        self.assertTrue(np.all(b == 10))
+        with dpctl.device_context("opencl:gpu"):
+            f(a, b)
 
+        self.assertTrue(np.all(b == 10))
 
     def test_multiple_prange(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             # dimensions must be provided as scalar
             m, n = a.shape
@@ -58,7 +62,6 @@ def f(a, b):
                 for j in prange(n):
                     b[i, j] = a[i, j] * val
 
-
             for i in prange(m):
                 for j in prange(n):
                     a[i, j] = a[i, j] * 10
@@ -68,13 +71,14 @@ def f(a, b):
         a = np.ones((m, n))
         b = np.ones((m, n))
 
-        f(a, b)
+        with dpctl.device_context("opencl:gpu"):
+            f(a, b)
+
         self.assertTrue(np.all(b == 10))
         self.assertTrue(np.all(a == 10))
 
-
     def test_three_prange(self):
-        @njit(parallel={'offload':True})
+        @njit
         def f(a, b):
             # dimensions must be provided as scalar
             m, n, o = a.shape
@@ -91,9 +95,10 @@ def f(a, b):
         a = np.ones((m, n, o))
         b = np.ones((m, n, o))
 
-        f(a, b)
-        self.assertTrue(np.all(b == 12))
+        with dpctl.device_context("opencl:gpu"):
+            f(a, b)
 
+        self.assertTrue(np.all(b == 12))
 
     @expectedFailureIf(sys.platform.startswith('win'))
     def test_two_consequent_prange(self):
@@ -110,19 +115,21 @@ def prange_example():
         old_debug = numba_dppy.compiler.DEBUG
         numba_dppy.compiler.DEBUG = 1
 
-        jitted = njit(parallel={'offload':True})(prange_example)
-        with captured_stdout() as stdout:
+        jitted = njit(prange_example)
+
+        with captured_stdout() as stdout, dpctl.device_context("opencl:gpu"):
             jitted_res = jitted()
 
         res = prange_example()
 
         numba_dppy.compiler.DEBUG = old_debug
 
-        self.assertEqual(stdout.getvalue().count('Parfor lowered on DPPY-device'), 2, stdout.getvalue())
-        self.assertEqual(stdout.getvalue().count('Failed to lower parfor on DPPY-device'), 0, stdout.getvalue())
+        self.assertEqual(stdout.getvalue().count(
+            'Parfor lowered on DPPY-device'), 2, stdout.getvalue())
+        self.assertEqual(stdout.getvalue().count(
+            'Failed to lower parfor on DPPY-device'), 0, stdout.getvalue())
         np.testing.assert_equal(res, jitted_res)
 
-
     @unittest.skip('NRT required but not enabled')
     def test_2d_arrays(self):
         def prange_example():
@@ -138,16 +145,19 @@ def prange_example():
         old_debug = numba_dppy.compiler.DEBUG
         numba_dppy.compiler.DEBUG = 1
 
-        jitted = njit(parallel={'offload':True})(prange_example)
-        with captured_stdout() as stdout:
+        jitted = njit(prange_example)
+
+        with captured_stdout() as stdout, dpctl.device_context("opencl:gpu"):
             jitted_res = jitted()
 
         res = prange_example()
 
         numba_dppy.compiler.DEBUG = old_debug
 
-        self.assertEqual(stdout.getvalue().count('Parfor lowered on DPPY-device'), 2, stdout.getvalue())
-        self.assertEqual(stdout.getvalue().count('Failed to lower parfor on DPPY-device'), 0, stdout.getvalue())
+        self.assertEqual(stdout.getvalue().count(
+            'Parfor lowered on DPPY-device'), 2, stdout.getvalue())
+        self.assertEqual(stdout.getvalue().count(
+            'Failed to lower parfor on DPPY-device'), 0, stdout.getvalue())
         np.testing.assert_equal(res, jitted_res)
 
 
diff --git a/numba_dppy/tests/test_vectorize.py b/numba_dppy/tests/test_vectorize.py
index 04891ca296..2fed0fc65f 100644
--- a/numba_dppy/tests/test_vectorize.py
+++ b/numba_dppy/tests/test_vectorize.py
@@ -5,11 +5,14 @@
 import sys
 import numpy as np
 from numba import njit, vectorize
-import numba_dppy, numba_dppy as dppy
+import numba_dppy
+import numba_dppy as dppy
+import dpctl
 from numba_dppy.testing import unittest
 from numba_dppy.testing import DPPYTestCase
 
 
+@unittest.skipUnless(dpctl.has_gpu_queues(), "test only on GPU system")
 class TestVectorize(DPPYTestCase):
     def test_vectorize(self):
 
@@ -17,9 +20,9 @@ def test_vectorize(self):
         def axy(a, x, y):
             return a * x + y
 
-        @njit(parallel={'offload':True})
+        @njit
         def f(a0, a1):
-            return np.cos(axy(a0, np.sin(a1) - 1., 1.) )
+            return np.cos(axy(a0, np.sin(a1) - 1., 1.))
 
         def f_np(a0, a1):
             sin_res = np.sin(a1)
@@ -28,11 +31,12 @@ def f_np(a0, a1):
                 res.append(axy(a0[i], sin_res[i] - 1., 1.))
             return np.cos(np.array(res))
 
-
         A = np.random.random(10)
         B = np.random.random(10)
 
-        expected = f(A, B)
+        with dpctl.device_context("opencl:gpu"):
+            expected = f(A, B)
+
         actual = f_np(A, B)
 
         max_abs_err = expected.sum() - actual.sum()
diff --git a/numba_dppy/tests/test_with_context.py b/numba_dppy/tests/test_with_context.py
index e025a77784..1f733829b6 100644
--- a/numba_dppy/tests/test_with_context.py
+++ b/numba_dppy/tests/test_with_context.py
@@ -12,7 +12,6 @@
 class TestWithDPPYContext(DPPYTestCase):
 
     @unittest.skipIf(not dpctl.has_gpu_queues(), "No GPU platforms available")
-    @expectedFailureIf(sys.platform.startswith('win'))
     def test_with_dppy_context_gpu(self):
 
         @njit
@@ -39,7 +38,6 @@ def func(b):
         self.assertTrue('Parfor lowered on DPPY-device' in got_gpu_message.getvalue())
 
     @unittest.skipIf(not dpctl.has_cpu_queues(), "No CPU platforms available")
-    @unittest.expectedFailure
     def test_with_dppy_context_cpu(self):
 
         @njit