diff --git a/numba/dppl/dppl_lowerer.py b/numba/dppl/dppl_lowerer.py index 157a4bb8885..a799ed88895 100644 --- a/numba/dppl/dppl_lowerer.py +++ b/numba/dppl/dppl_lowerer.py @@ -35,12 +35,13 @@ from numba.core.typing import signature import warnings -from numba.core.errors import NumbaParallelSafetyWarning +from numba.core.errors import NumbaParallelSafetyWarning, NumbaPerformanceWarning from .target import SPIR_GENERIC_ADDRSPACE from .dufunc_inliner import dufunc_inliner from . import dppl_host_fn_call_gen as dppl_call_gen import dppl.ocldrv as driver +from numba.dppl.target import DPPLTargetContext def _print_block(block): @@ -956,10 +957,72 @@ def load_range(v): from numba.core.lowering import Lower + class DPPLLower(Lower): def __init__(self, context, library, fndesc, func_ir, metadata=None): Lower.__init__(self, context, library, fndesc, func_ir, metadata) - lowering.lower_extensions[parfor.Parfor] = _lower_parfor_gufunc + + fndesc_cpu = copy.copy(fndesc) + fndesc_cpu.calltypes = fndesc.calltypes.copy() + fndesc_cpu.typemap = fndesc.typemap.copy() + + cpu_context = context.cpu_context if isinstance(context, DPPLTargetContext) else context + self.gpu_lower = Lower(context, library, fndesc, func_ir.copy(), metadata) + self.cpu_lower = Lower(cpu_context, library, fndesc_cpu, func_ir.copy(), metadata) + + def lower(self): + # Basically we are trying to lower on GPU first and if failed - try to lower on CPU. + # This happens in next order: + # 1. Start lowering of parent function + # 2. Try to lower parfor on GPU + # 2.a. enter lower_parfor_rollback and prepare function to lower on GPU - insert get_global_id. + # 2.a.a. starting lower parfor body - enter this point (DPPLLower.lower()) second time. + # 2.a.b. If lowering on GPU failed - try on CPU. + # 2.a.d. Since get_global_id is NOT supported with CPU context - fail and throw exception + # 2.b. in lower_parfor_rollback catch exception and restore parfor body and other to its initial state + # 2.c. in lower_parfor_rollback throw expeption to catch it here (DPPLLower.lower()) + # 3. Catch exception and start parfor lowering with CPU context. + + # WARNING: this approach only works in case no device specific modifications were added to + # parent function (function with parfor). In case parent function was patched with device specific + # different solution should be used. + try: + lowering.lower_extensions[parfor.Parfor] = lower_parfor_rollback + self.gpu_lower.lower() + self.base_lower = self.gpu_lower + lowering.lower_extensions[parfor.Parfor] = numba.parfors.parfor_lowering._lower_parfor_parallel + except: + lowering.lower_extensions[parfor.Parfor] = numba.parfors.parfor_lowering._lower_parfor_parallel + self.cpu_lower.lower() + self.base_lower = self.cpu_lower + + self.env = self.base_lower.env + self.call_helper = self.base_lower.call_helper + + def create_cpython_wrapper(self, release_gil=False): + return self.base_lower.create_cpython_wrapper(release_gil) + + +def lower_parfor_rollback(lowerer, parfor): + cache_parfor_races = copy.copy(parfor.races) + cache_parfor_params = copy.copy(parfor.params) + cache_parfor_loop_body = copy.deepcopy(parfor.loop_body) + cache_parfor_init_block = parfor.init_block.copy() + cache_parfor_loop_nests = parfor.loop_nests.copy() + + try: + _lower_parfor_gufunc(lowerer, parfor) + except Exception as e: + msg = ("Failed to lower parfor on GPU") + warnings.warn(NumbaPerformanceWarning(msg, parfor.loc)) + raise e + finally: + parfor.params = cache_parfor_params + parfor.loop_body = cache_parfor_loop_body + parfor.init_block = cache_parfor_init_block + parfor.loop_nests = cache_parfor_loop_nests + parfor.races = cache_parfor_races + def dppl_lower_array_expr(lowerer, expr): raise NotImplementedError(expr) diff --git a/numba/dppl/target.py b/numba/dppl/target.py index 793aa369994..f65c9f7d87d 100644 --- a/numba/dppl/target.py +++ b/numba/dppl/target.py @@ -11,6 +11,7 @@ from numba.core.utils import cached_property from numba.core import datamodel from numba.core.base import BaseContext +from numba.core.registry import cpu_target from numba.core.callconv import MinimalCallConv from . import codegen @@ -105,6 +106,12 @@ def init(self): from numba.np.ufunc_db import _ufunc_db as ufunc_db self.ufunc_db = copy.deepcopy(ufunc_db) + from numba.core.cpu import CPUContext + from numba.core.typing import Context as TypingContext + + self.cpu_context = cpu_target.target_context + + def replace_numpy_ufunc_with_opencl_supported_functions(self): from numba.dppl.ocl.mathimpl import lower_ocl_impl, sig_mapper diff --git a/numba/dppl/tests/dppl/test_dppl_fallback.py b/numba/dppl/tests/dppl/test_dppl_fallback.py new file mode 100644 index 00000000000..38842e90274 --- /dev/null +++ b/numba/dppl/tests/dppl/test_dppl_fallback.py @@ -0,0 +1,56 @@ +from __future__ import print_function, division, absolute_import + +import numpy as np + +import numba +from numba import dppl +from numba.dppl.testing import unittest +from numba.dppl.testing import DPPLTestCase +import dppl.ocldrv as ocldrv +import sys +import io + + +@unittest.skipUnless(ocldrv.has_gpu_device, 'test only on GPU system') +class TestDPPLFallback(DPPLTestCase): + + def capture_stderr(self, func): + backup = sys.stderr + sys.stderr = io.StringIO() + result = func() + out = sys.stderr.getvalue() + sys.stderr.close() + sys.stderr = backup + + return out, result + + def test_dppl_fallback(self): + + @numba.jit + def fill_value(i): + return i + + def np_rand_fallback(): + x = 10 + a = np.empty(shape=x, dtype=np.float32) + + for i in numba.prange(x): + a[i] = fill_value(i) + + return a + + def run_dppl(): + dppl = numba.njit(parallel={'offload':True})(np_rand_fallback) + return dppl() + + ref = np_rand_fallback + + err, dppl_result = self.capture_stderr(run_dppl) + ref_result = ref() + + np.testing.assert_array_equal(dppl_result, ref_result) + self.assertTrue('Failed to lower parfor on GPU' in err) + + +if __name__ == '__main__': + unittest.main()