Skip to content
45 changes: 45 additions & 0 deletions numba_dppy/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright 2021 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from contextlib import contextmanager

import dpctl
from numba._dispatcher import set_use_tls_target_stack
from numba.core.dispatcher import TargetConfig

from numba_dppy.dppy_offload_dispatcher import DppyOffloadDispatcher


@contextmanager
def switch_target(retarget):
# __enter__
tc = TargetConfig()
tc.push(retarget)
set_use_tls_target_stack(True)
yield
# __exit__
tc.pop()
set_use_tls_target_stack(False)


def retarget_to_gpu(cpu_disp):
dispatcher = DppyOffloadDispatcher(cpu_disp.py_func)
return lambda *args, **kwargs: dispatcher(*args, **kwargs)


@contextmanager
def device_context(*args, **kwargs):
with switch_target(retarget_to_gpu):
with dpctl.device_context(*args, **kwargs) as queue:
yield queue
2 changes: 1 addition & 1 deletion numba_dppy/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# from numba.npyufunc.deviceufunc import (UFuncMechanism, GenerializedUFunc,
# GUFuncCallSteps)

from .. import dispatcher, utils, typing
from numba.core import dispatcher, utils, typing
from .compiler import DPPYCompiler


Expand Down
1 change: 1 addition & 0 deletions numba_dppy/dppy_passes.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ def run_pass(self, state):
state.typingctx,
state.flags.auto_parallel,
state.flags,
state.metadata,
state.parfor_diagnostics,
)

Expand Down
4 changes: 2 additions & 2 deletions numba_dppy/tests/kernel_tests/test_arg_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import numpy as np
import numba_dppy as dppy
import pytest
import dpctl
from numba_dppy.context import device_context
from numba_dppy.tests.skip_tests import skip_test


Expand Down Expand Up @@ -77,6 +77,6 @@ def test_kernel_arg_accessor(filter_str, input_arrays, kernel):

a, b, actual = input_arrays
expected = a + b
with dpctl.device_context(filter_str):
with device_context(filter_str):
call_kernel(global_size, local_size, a, b, actual, kernel)
np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
6 changes: 3 additions & 3 deletions numba_dppy/tests/kernel_tests/test_arg_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import numpy as np
import numba_dppy as dppy
import pytest
import dpctl
from numba_dppy.context import device_context
from numba_dppy.tests.skip_tests import skip_test

global_size = 1054
Expand Down Expand Up @@ -63,7 +63,7 @@ def test_kernel_arg_types(filter_str, input_arrays):
kernel = dppy.kernel(mul_kernel)
a, actual, c = input_arrays
expected = a * c
with dpctl.device_context(filter_str):
with device_context(filter_str):
kernel[global_size, local_size](a, actual, c)
np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)

Expand All @@ -82,7 +82,7 @@ def test_bool_type(filter_str):
kernel = dppy.kernel(check_bool_kernel)
a = np.array([2], np.int64)

with dpctl.device_context(filter_str):
with device_context(filter_str):
kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a, True)
assert a[0] == 111
kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a, False)
Expand Down
9 changes: 5 additions & 4 deletions numba_dppy/tests/kernel_tests/test_atomic_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@

import numba_dppy as dppy
import pytest
import dpctl

from numba_dppy.context import device_context


global_size = 100
Expand Down Expand Up @@ -94,7 +95,7 @@ def test_kernel_atomic_simple(filter_str, input_arrays, kernel_result_pair):

a, dtype = input_arrays
kernel, expected = kernel_result_pair
with dpctl.device_context(filter_str):
with device_context(filter_str):
kernel[global_size, dppy.DEFAULT_LOCAL_SIZE](a)
assert a[0] == expected

Expand All @@ -120,7 +121,7 @@ def test_kernel_atomic_local(filter_str, input_arrays, return_list_of_op):
a, dtype = input_arrays
op_type, expected = return_list_of_op
kernel = get_kernel_local(op_type, dtype)
with dpctl.device_context(filter_str):
with device_context(filter_str):
kernel[global_size, global_size](a)
assert a[0] == expected

Expand Down Expand Up @@ -162,6 +163,6 @@ def test_kernel_atomic_multi_dim(
dim = return_list_of_dim
kernel = get_kernel_multi_dim(op_type, len(dim))
a = np.zeros(dim, return_dtype)
with dpctl.device_context(filter_str):
with device_context(filter_str):
kernel[global_size, dppy.DEFAULT_LOCAL_SIZE](a)
assert a[0] == expected
8 changes: 4 additions & 4 deletions numba_dppy/tests/kernel_tests/test_barrier.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import numpy as np
import numba_dppy as dppy
import pytest
import dpctl
from numba_dppy.context import device_context
from numba_dppy.tests.skip_tests import skip_test


Expand Down Expand Up @@ -46,7 +46,7 @@ def twice(A):
arr = np.random.random(N).astype(np.float32)
orig = arr.copy()

with dpctl.device_context(filter_str) as gpu_queue:
with device_context(filter_str) as gpu_queue:
twice[N, N // 2](arr)

# The computation is correct?
Expand All @@ -69,7 +69,7 @@ def twice(A):
arr = np.random.random(N).astype(np.float32)
orig = arr.copy()

with dpctl.device_context(filter_str) as gpu_queue:
with device_context(filter_str) as gpu_queue:
twice[N, dppy.DEFAULT_LOCAL_SIZE](arr)

# The computation is correct?
Expand Down Expand Up @@ -97,7 +97,7 @@ def reverse_array(A):
arr = np.arange(blocksize).astype(np.float32)
orig = arr.copy()

with dpctl.device_context(filter_str) as gpu_queue:
with device_context(filter_str) as gpu_queue:
reverse_array[blocksize, blocksize](arr)

expected = orig[::-1] + orig
Expand Down
4 changes: 2 additions & 2 deletions numba_dppy/tests/kernel_tests/test_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import numpy as np
import numba_dppy as dppy
import pytest
import dpctl
from numba_dppy.context import device_context
from numba_dppy.tests.skip_tests import skip_test

list_of_filter_strs = [
Expand Down Expand Up @@ -46,7 +46,7 @@ def test_caching_kernel(filter_str):
b = np.array(np.random.random(N), dtype=np.float32)
c = np.ones_like(a)

with dpctl.device_context(filter_str) as gpu_queue:
with device_context(filter_str) as gpu_queue:
func = dppy.kernel(data_parallel_sum)
caching_kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(a, b, c)

Expand Down
4 changes: 2 additions & 2 deletions numba_dppy/tests/kernel_tests/test_math_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import dpctl
from numba_dppy.context import device_context
import numba_dppy as dppy
import numpy as np
import pytest
Expand Down Expand Up @@ -67,7 +67,7 @@ def f(a, b):
i = dppy.get_global_id(0)
b[i] = uop(a[i])

with dpctl.device_context(filter_str):
with device_context(filter_str):
f[a.size, dppy.DEFAULT_LOCAL_SIZE](a, actual)

expected = np_uop(a)
Expand Down
8 changes: 4 additions & 4 deletions numba_dppy/tests/kernel_tests/test_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import numpy as np
import numba_dppy as dppy
import pytest
import dpctl
from numba_dppy.context import device_context
from numba_dppy.tests.skip_tests import skip_test

list_of_filter_strs = [
Expand All @@ -31,7 +31,7 @@ def filter_str(request):
@pytest.mark.xfail
def test_print_only_str(filter_str):
try:
with dpctl.device_context(filter_str):
with device_context(filter_str):
pass
except Exception:
pytest.skip()
Expand All @@ -45,7 +45,7 @@ def f():
# replaced by a puts() which fails due to lack of addrspace in the
# puts function signature right now, and would fail in general due
# to lack of support for puts() in OpenCL.
with dpctl.device_context(filter_str), captured_stdout() as stdout:
with device_context(filter_str), captured_stdout() as stdout:
f[3, dppy.DEFAULT_LOCAL_SIZE]()


Expand Down Expand Up @@ -75,7 +75,7 @@ def f(a):
a = input_arrays
global_size = 3

with dpctl.device_context(filter_str):
with device_context(filter_str):
f[global_size, dppy.DEFAULT_LOCAL_SIZE](a)
captured = capfd.readouterr()
assert "test" in captured.out
8 changes: 4 additions & 4 deletions numba_dppy/tests/njit_tests/dpnp/test_numpy_array_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# limitations under the License.
################################################################################

import dpctl
from numba_dppy.context import device_context
import numpy as np
from numba import njit
import pytest
Expand Down Expand Up @@ -106,7 +106,7 @@ def test_unary_ops(filter_str, unary_op, input_array, capfd):
expected = np.empty(shape=a.shape, dtype=a.dtype)

f = njit(fn)
with dpctl.device_context(filter_str), dpnp_debug():
with device_context(filter_str), dpnp_debug():
actual = f(a)
captured = capfd.readouterr()
assert "dpnp implementation" in captured.out
Expand All @@ -130,7 +130,7 @@ def test_binary_op(filter_str, binary_op, input_array, dtype, get_shape, capfd):
expected = np.empty(shape=a.shape, dtype=a.dtype)

f = njit(fn)
with dpctl.device_context(filter_str), dpnp_debug():
with device_context(filter_str), dpnp_debug():
actual = f(a, dtype)
captured = capfd.readouterr()
assert "dpnp implementation" in captured.out
Expand Down Expand Up @@ -159,7 +159,7 @@ def test_full(filter_str, full_name, input_array, get_shape, capfd):
expected = np.empty(shape=a.shape, dtype=a.dtype)

f = njit(fn)
with dpctl.device_context(filter_str), dpnp_debug():
with device_context(filter_str), dpnp_debug():
actual = f(a, np.array([2]))
captured = capfd.readouterr()
assert "dpnp implementation" in captured.out
Expand Down
6 changes: 3 additions & 3 deletions numba_dppy/tests/njit_tests/dpnp/test_numpy_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# limitations under the License.
################################################################################

import dpctl
from numba_dppy.context import device_context
import numpy as np
from numba import njit
import pytest
Expand Down Expand Up @@ -103,7 +103,7 @@ def test_unary_ops(filter_str, unary_op, input_arrays, get_shape, capfd):
expected = np.empty(shape=a.shape, dtype=a.dtype)

f = njit(op)
with dpctl.device_context(filter_str), dpnp_debug():
with device_context(filter_str), dpnp_debug():
actual = f(a)
captured = capfd.readouterr()
assert "dpnp implementation" in captured.out
Expand Down Expand Up @@ -142,7 +142,7 @@ def test_take(filter_str, input_arrays, indices, capfd):
expected = np.empty(shape=a.shape, dtype=a.dtype)

f = njit(fn)
with dpctl.device_context(filter_str), dpnp_debug():
with device_context(filter_str), dpnp_debug():
actual = f(a, indices)
captured = capfd.readouterr()
assert "dpnp implementation" in captured.out
Expand Down
Loading