IntelPython · PokhodenkoSA · Nov 17, 2020 · Nov 12, 2020 · Nov 13, 2020 · Nov 13, 2020
diff --git a/README.rst b/README.rst
@@ -1,5 +1,5 @@
 *****
-Numba
+Numba with patches for numba-dppy
 *****
 
 .. image:: https://badges.gitter.im/numba/numba.svg
@@ -10,6 +10,14 @@ Numba
    :target: https://numba.discourse.group/
    :alt: Discourse
 
+Patches for numba-dppy
+######################
+
+See https://github.com/IntelPython/numba-dppy.
+If `numba-dppy` package is installed this version of Numba provides
+additional features.
+Without `numba-dppy` package this version of Numba works like original Numba.
+
 A Just-In-Time Compiler for Numerical Functions in Python
 #########################################################
 

diff --git a/numba/_typeof.c b/numba/_typeof.c
@@ -835,6 +835,7 @@ int
 typeof_typecode(PyObject *dispatcher, PyObject *val)
 {
     PyTypeObject *tyobj = Py_TYPE(val);
+    int no_subtype_attr;
     /* This needs to be kept in sync with Dispatcher.typeof_pyval(),
      * otherwise funny things may happen.
      */
@@ -861,9 +862,19 @@ typeof_typecode(PyObject *dispatcher, PyObject *val)
         return typecode_arrayscalar(dispatcher, val);
     }
     /* Array handling */
-    else if (PyType_IsSubtype(tyobj, &PyArray_Type)) {
+    else if (tyobj == &PyArray_Type) {
         return typecode_ndarray(dispatcher, (PyArrayObject*)val);
     }
+    /* Subtypes of Array handling */
+    else if (PyType_IsSubtype(tyobj, &PyArray_Type)) {
+        /* If the class has an attribute named __numba_no_subtype_ndarray then
+           don't treat it as a normal variant of a Numpy ndarray but as it's own
+           separate type. */
+        no_subtype_attr = PyObject_HasAttrString(val, "__numba_no_subtype_ndarray__");
+        if (!no_subtype_attr) {
+            return typecode_ndarray(dispatcher, (PyArrayObject*)val);
+        }
+    }
 
     return typecode_using_fingerprint(dispatcher, val);
 }

diff --git a/numba/core/codegen.py b/numba/core/codegen.py
@@ -663,7 +663,7 @@ def finalize(self):
         Finalization involves various stages of code optimization and
         linking.
         """
-        require_global_compiler_lock()
+        #require_global_compiler_lock()
 
         # Report any LLVM-related problems to the user
         self._codegen._check_llvm_bugs()
@@ -690,7 +690,7 @@ def finalize(self):
         self._final_module.verify()
         self._finalize_final_module()
 
-    def _finalize_dyanmic_globals(self):
+    def _finalize_dynamic_globals(self):
         # Scan for dynamic globals
         for gv in self._final_module.global_variables:
             if gv.name.startswith('numba.dynamic.globals'):
@@ -708,7 +708,7 @@ def _finalize_final_module(self):
         """
         Make the underlying LLVM module ready to use.
         """
-        self._finalize_dyanmic_globals()
+        self._finalize_dynamic_globals()
         self._verify_declare_only_symbols()
 
         # Remember this on the module, for the object cache hooks

diff --git a/numba/core/datamodel/models.py b/numba/core/datamodel/models.py
@@ -873,7 +873,7 @@ def __init__(self, dmm, fe_type):
             ('parent', types.pyobject),
             ('nitems', types.intp),
             ('itemsize', types.intp),
-            ('data', types.CPointer(fe_type.dtype)),
+            ('data', types.CPointer(fe_type.dtype, addrspace=fe_type.addrspace)),
             ('shape', types.UniTuple(types.intp, ndim)),
             ('strides', types.UniTuple(types.intp, ndim)),
 

diff --git a/numba/core/decorators.py b/numba/core/decorators.py
@@ -152,7 +152,7 @@ def bar(x, y):
         target = options.pop('target')
         warnings.warn("The 'target' keyword argument is deprecated.", NumbaDeprecationWarning)
     else:
-        target = options.pop('_target', 'cpu')
+        target = options.pop('_target', None)
 
     options['boundscheck'] = boundscheck
 
@@ -186,27 +186,16 @@ def bar(x, y):
 
 
 def _jit(sigs, locals, target, cache, targetoptions, **dispatcher_args):
-    dispatcher = registry.dispatcher_registry[target]
-
-    def wrapper(func):
-        if extending.is_jitted(func):
-            raise TypeError(
-                "A jit decorator was called on an already jitted function "
-                f"{func}.  If trying to access the original python "
-                f"function, use the {func}.py_func attribute."
-            )
-
-        if not inspect.isfunction(func):
-            raise TypeError(
-                "The decorated object is not a function (got type "
-                f"{type(func)})."
-            )
 
+    def wrapper(func, dispatcher):
         if config.ENABLE_CUDASIM and target == 'cuda':
             from numba import cuda
             return cuda.jit(func)
         if config.DISABLE_JIT and not target == 'npyufunc':
             return func
+        if target == 'dppl':
+            from . import dppl
+            return dppl.jit(func)
         disp = dispatcher(py_func=func, locals=locals,
                           targetoptions=targetoptions,
                           **dispatcher_args)
@@ -222,7 +211,42 @@ def wrapper(func):
                 disp.disable_compile()
         return disp
 
-    return wrapper
+    def __wrapper(func):
+        if extending.is_jitted(func):
+            raise TypeError(
+                "A jit decorator was called on an already jitted function "
+                f"{func}.  If trying to access the original python "
+                f"function, use the {func}.py_func attribute."
+            )
+
+        if not inspect.isfunction(func):
+            raise TypeError(
+                "The decorated object is not a function (got type "
+                f"{type(func)})."
+            )
+
+        is_numba_dppy_present = False
+        try:
+            import numba_dppy.config as dppy_config
+
+            is_numba_dppy_present = dppy_config.dppy_present
+        except ImportError:
+            pass
+
+        if (not is_numba_dppy_present
+            or target == 'npyufunc' or targetoptions.get('no_cpython_wrapper')
+            or sigs or config.DISABLE_JIT or not targetoptions.get('nopython')):
+            target_ = target
+            if target_ is None:
+                target_ = 'cpu'
+            disp = registry.dispatcher_registry[target_]
+            return wrapper(func, disp)
+
+        from numba_dppy.target_dispatcher import TargetDispatcher
+        disp = TargetDispatcher(func, wrapper, target, targetoptions.get('parallel'))
+        return disp
+
+    return __wrapper
 
 
 def generated_jit(function=None, target='cpu', cache=False,

diff --git a/numba/core/dispatcher.py b/numba/core/dispatcher.py
@@ -711,7 +711,14 @@ def _set_uuid(self, u):
         self._recent.append(self)
 
 
-class Dispatcher(serialize.ReduceMixin, _MemoMixin, _DispatcherBase):
+import abc
+
+class DispatcherMeta(abc.ABCMeta):
+    def __instancecheck__(self, other):
+        return type(type(other)) == DispatcherMeta
+
+
+class Dispatcher(serialize.ReduceMixin, _MemoMixin, _DispatcherBase, metaclass=DispatcherMeta):
     """
     Implementation of user-facing dispatcher objects (i.e. created using
     the @jit decorator).
@@ -937,6 +944,9 @@ def get_function_type(self):
             cres = tuple(self.overloads.values())[0]
             return types.FunctionType(cres.signature)
 
+    def get_compiled(self):
+        return self
+
 
 class LiftedCode(serialize.ReduceMixin, _MemoMixin, _DispatcherBase):
     """

diff --git a/numba/core/extending.py b/numba/core/extending.py
@@ -15,7 +15,7 @@
     lower_setattr, lower_setattr_generic, lower_cast)  # noqa: F401
 from numba.core.datamodel import models   # noqa: F401
 from numba.core.datamodel import register_default as register_model  # noqa: F401, E501
-from numba.core.pythonapi import box, unbox, reflect, NativeValue  # noqa: F401
+from numba.core.pythonapi import box, unbox, reflect, NativeValue, allocator  # noqa: F401
 from numba._helperlib import _import_cython_function  # noqa: F401
 from numba.core.serialize import ReduceMixin
 

diff --git a/numba/core/ir_utils.py b/numba/core/ir_utils.py
@@ -64,6 +64,8 @@ def mk_alloc(typemap, calltypes, lhs, size_var, dtype, scope, loc):
     out = []
     ndims = 1
     size_typ = types.intp
+    # Get the type of the array being allocated.
+    arr_typ = typemap[lhs.name]
     if isinstance(size_var, tuple):
         if len(size_var) == 1:
             size_var = size_var[0]
@@ -108,11 +110,13 @@ def mk_alloc(typemap, calltypes, lhs, size_var, dtype, scope, loc):
     typ_var_assign = ir.Assign(np_typ_getattr, typ_var, loc)
     alloc_call = ir.Expr.call(attr_var, [size_var, typ_var], (), loc)
     if calltypes:
-        calltypes[alloc_call] = typemap[attr_var.name].get_call_type(
+        cac = typemap[attr_var.name].get_call_type(
             typing.Context(), [size_typ, types.functions.NumberClass(dtype)], {})
-    # signature(
-    #    types.npytypes.Array(dtype, ndims, 'C'), size_typ,
-    #    types.functions.NumberClass(dtype))
+        # By default, all calls to "empty" are typed as returning a standard
+        # Numpy ndarray.  If we are allocating a ndarray subclass here then
+        # just change the return type to be that of the subclass.
+        cac._return_type = arr_typ
+        calltypes[alloc_call] = cac
     alloc_assign = ir.Assign(alloc_call, lhs, loc)
 
     out.extend([g_np_assign, attr_assign, typ_var_assign, alloc_assign])

diff --git a/numba/core/lowering.py b/numba/core/lowering.py
@@ -274,6 +274,13 @@ def debug_print(self, msg):
 class Lower(BaseLower):
     GeneratorLower = generators.GeneratorLower
 
+    def __init__(self, context, library, fndesc, func_ir, metadata=None):
+        BaseLower.__init__(self, context, library, fndesc, func_ir, metadata)
+        from numba.parfors.parfor_lowering import _lower_parfor_parallel
+        from numba.parfors import parfor
+        if parfor.Parfor not in lower_extensions:
+            lower_extensions[parfor.Parfor] = [_lower_parfor_parallel]
+
     def pre_block(self, block):
         from numba.core.unsafe import eh
 
@@ -440,7 +447,7 @@ def lower_inst(self, inst):
         else:
             for _class, func in lower_extensions.items():
                 if isinstance(inst, _class):
-                    func(self, inst)
+                    func[-1](self, inst)
                     return
             raise NotImplementedError(type(inst))
 

diff --git a/numba/core/pythonapi.py b/numba/core/pythonapi.py
@@ -46,10 +46,13 @@ def lookup(self, typeclass, default=None):
 _boxers = _Registry()
 _unboxers = _Registry()
 _reflectors = _Registry()
+# Registry of special allocators for types.
+_allocators = _Registry()
 
 box = _boxers.register
 unbox = _unboxers.register
 reflect = _reflectors.register
+allocator = _allocators.register
 
 class _BoxContext(namedtuple("_BoxContext",
                   ("context", "builder", "pyapi", "env_manager"))):
@@ -1187,8 +1190,11 @@ def nrt_adapt_ndarray_to_python(self, aryty, ary, dtypeptr):
         assert self.context.enable_nrt, "NRT required"
 
         intty = ir.IntType(32)
+        # Embed the Python type of the array (maybe subclass) in the LLVM.
+        serial_aryty_pytype = self.unserialize(self.serialize_object(aryty.py_type))
+
         fnty = Type.function(self.pyobj,
-                             [self.voidptr, intty, intty, self.pyobj])
+                             [self.voidptr, self.pyobj, intty, intty, self.pyobj])
         fn = self._get_function(fnty, name="NRT_adapt_ndarray_to_python_acqref")
         fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE)
 
@@ -1198,6 +1204,7 @@ def nrt_adapt_ndarray_to_python(self, aryty, ary, dtypeptr):
         aryptr = cgutils.alloca_once_value(self.builder, ary)
         return self.builder.call(fn, [self.builder.bitcast(aryptr,
                                                            self.voidptr),
+                                      serial_aryty_pytype,
                                       ndim, writable, dtypeptr])
 
     def nrt_meminfo_new_from_pyobject(self, data, pyobj):

diff --git a/numba/core/registry.py b/numba/core/registry.py
@@ -2,6 +2,7 @@
 
 from numba.core.descriptors import TargetDescriptor
 from numba.core import utils, typing, dispatcher, cpu
+from numba.core.compiler_lock import global_compiler_lock
 
 # -----------------------------------------------------------------------------
 # Default CPU target descriptors
@@ -26,16 +27,19 @@ class CPUTarget(TargetDescriptor):
     _nested = _NestedContext()
 
     @utils.cached_property
+    @global_compiler_lock
     def _toplevel_target_context(self):
         # Lazily-initialized top-level target context, for all threads
         return cpu.CPUContext(self.typing_context)
 
     @utils.cached_property
+    @global_compiler_lock
     def _toplevel_typing_context(self):
         # Lazily-initialized top-level typing context, for all threads
         return typing.Context()
 
     @property
+    @global_compiler_lock
     def target_context(self):
         """
         The target context for CPU targets.
@@ -47,6 +51,7 @@ def target_context(self):
             return self._toplevel_target_context
 
     @property
+    @global_compiler_lock
     def typing_context(self):
         """
         The typing context for CPU targets.
@@ -57,6 +62,7 @@ def typing_context(self):
         else:
             return self._toplevel_typing_context
 
+    @global_compiler_lock
     def nested_context(self, typing_context, target_context):
         """
         A context manager temporarily replacing the contexts with the