diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index fd4b06e445..0000000000 --- a/.coveragerc +++ /dev/null @@ -1,21 +0,0 @@ -[run] -branch = True -source = numba_dppy -omit = numba_dppy/tests/* -concurrency = multiprocessing -parallel = True - -[report] -precision = 2 -; show_missing = True -; sort = -cover -omit = - # Vendored packages - numba_dppy/_version.py -exclude_lines = - pragma: no cover - raise NotImplementedError - if config.DEBUG: - # numba compiled code - @register_jitable - def dpnp_impl diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index f988b3a7a8..21dc114bad 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -2,3 +2,6 @@ # Migrate code style to Black 8bd62e61bb70fe0483bd494040e4103fd050252a + +# Set max line width to 80 in black +c27b2b31b3f275225f8db5d7bd5924b4677bcce2 diff --git a/numba_dppy/compiler.py b/numba_dppy/compiler.py index ffe24666f0..2efc12693b 100644 --- a/numba_dppy/compiler.py +++ b/numba_dppy/compiler.py @@ -84,12 +84,16 @@ def define_pipelines(self): # this maintains the objmode fallback behaviour pms = [] self.state.parfor_diagnostics = ExtendedParforDiagnostics() - self.state.metadata["parfor_diagnostics"] = self.state.parfor_diagnostics + self.state.metadata[ + "parfor_diagnostics" + ] = self.state.parfor_diagnostics if not self.state.flags.force_pyobject: # print("Numba-DPPY [INFO]: Using Numba-DPPY pipeline") pms.append(DPPYPassBuilder.define_nopython_pipeline(self.state)) if self.state.status.can_fallback or self.state.flags.force_pyobject: - pms.append(DefaultPassBuilder.define_objectmode_pipeline(self.state)) + pms.append( + DefaultPassBuilder.define_objectmode_pipeline(self.state) + ) return pms @@ -169,7 +173,9 @@ def compile_with_dppy(pyfunc, return_type, args, is_kernel, debug=None): def compile_kernel(sycl_queue, pyfunc, args, access_types, debug=None): # For any array we only accept numba_dppy.dppy_array_type.DPPYArray for arg in args: - if isinstance(arg, types.npytypes.Array) and not isinstance(arg, DPPYArray): + if isinstance(arg, types.npytypes.Array) and not isinstance( + arg, DPPYArray + ): raise TypeError( "We only accept DPPYArray as type of array-like objects. We received %s" % (type(arg)) @@ -203,10 +209,14 @@ def compile_kernel(sycl_queue, pyfunc, args, access_types, debug=None): return oclkern -def compile_kernel_parfor(sycl_queue, func_ir, args, args_with_addrspaces, debug=None): +def compile_kernel_parfor( + sycl_queue, func_ir, args, args_with_addrspaces, debug=None +): # For any array we only accept numba_dppy.dppy_array_type.DPPYArray for arg in args_with_addrspaces: - if isinstance(arg, types.npytypes.Array) and not isinstance(arg, DPPYArray): + if isinstance(arg, types.npytypes.Array) and not isinstance( + arg, DPPYArray + ): raise TypeError( "We only accept DPPYArray as type of array-like objects. We received %s" % (type(arg)) @@ -246,7 +256,11 @@ def compile_kernel_parfor(sycl_queue, func_ir, args, args_with_addrspaces, debug def compile_dppy_func(pyfunc, return_type, args, debug=None): cres = compile_with_dppy( - pyfunc=pyfunc, return_type=return_type, args=args, is_kernel=False, debug=debug + pyfunc=pyfunc, + return_type=return_type, + args=args, + is_kernel=False, + debug=debug, ) func = cres.library.get_function(cres.fndesc.llvm_func_name) cres.target_context.mark_ocl_device(func) @@ -312,7 +326,9 @@ def compile(self, args): if first_definition: # First definition - cres.target_context.insert_user_function(self, cres.fndesc, libs) + cres.target_context.insert_user_function( + self, cres.fndesc, libs + ) else: cres.target_context.add_user_function(self, cres.fndesc, libs) @@ -330,7 +346,9 @@ def __init__(self, cres): def _ensure_valid_work_item_grid(val, sycl_queue): if not isinstance(val, (tuple, list, int)): - error_message = "Cannot create work item dimension from provided argument" + error_message = ( + "Cannot create work item dimension from provided argument" + ) raise ValueError(error_message) if isinstance(val, int): @@ -351,7 +369,9 @@ def _ensure_valid_work_item_grid(val, sycl_queue): def _ensure_valid_work_group_size(val, work_item_grid): if not isinstance(val, (tuple, list, int)): - error_message = "Cannot create work item dimension from provided argument" + error_message = ( + "Cannot create work item dimension from provided argument" + ) raise ValueError(error_message) if isinstance(val, int): @@ -496,7 +516,9 @@ def __call__(self, *args): internal_device_arrs, self.ordered_arg_access_types, ): - self._pack_argument(ty, val, self.sycl_queue, i_dev_arr, access_type) + self._pack_argument( + ty, val, self.sycl_queue, i_dev_arr, access_type + ) def _pack_argument(self, ty, val, sycl_queue, device_arr, access_type): """ @@ -584,7 +606,9 @@ def _unpack_argument( packed_val = val usm_mem = has_usm_memory(val) if usm_mem is None: - default_behavior = self.check_for_invalid_access_type(access_type) + default_behavior = self.check_for_invalid_access_type( + access_type + ) usm_mem = as_usm_obj(val, queue=sycl_queue, copy=False) orig_val = val @@ -603,8 +627,10 @@ def _unpack_argument( if ( default_behavior - or self.valid_access_types[access_type] == _NUMBA_DPPY_READ_ONLY - or self.valid_access_types[access_type] == _NUMBA_DPPY_READ_WRITE + or self.valid_access_types[access_type] + == _NUMBA_DPPY_READ_ONLY + or self.valid_access_types[access_type] + == _NUMBA_DPPY_READ_WRITE ): copy_from_numpy_to_usm_obj(usm_mem, packed_val) @@ -693,7 +719,9 @@ def __call__(self, *args, **kwargs): argtypes = self._get_argtypes(*args) kernel = self.specialize(argtypes, current_queue) - cfg = kernel.configure(self.sycl_queue, self.global_size, self.local_size) + cfg = kernel.configure( + self.sycl_queue, self.global_size, self.local_size + ) cfg(*args) def specialize(self, argtypes, queue): diff --git a/numba_dppy/config.py b/numba_dppy/config.py index 10cff9dbe6..c59ef68789 100644 --- a/numba_dppy/config.py +++ b/numba_dppy/config.py @@ -105,4 +105,6 @@ def __getattr__(name): LLVM_SPIRV_ROOT = _readenv("NUMBA_DPPY_LLVM_SPIRV_ROOT", str, "") # Emit debug info DEBUG = _readenv("NUMBA_DPPY_DEBUG", int, config.DEBUG) -DEBUGINFO_DEFAULT = _readenv("NUMBA_DPPY_DEBUGINFO", int, config.DEBUGINFO_DEFAULT) +DEBUGINFO_DEFAULT = _readenv( + "NUMBA_DPPY_DEBUGINFO", int, config.DEBUGINFO_DEFAULT +) diff --git a/numba_dppy/decorators.py b/numba_dppy/decorators.py index c65398451f..c0f2f854b9 100644 --- a/numba_dppy/decorators.py +++ b/numba_dppy/decorators.py @@ -42,7 +42,9 @@ def kernel(signature=None, access_types=None, debug=None): def autojit(debug=None, access_types=None): def _kernel_autojit(pyfunc): - ordered_arg_access_types = get_ordered_arg_access_types(pyfunc, access_types) + ordered_arg_access_types = get_ordered_arg_access_types( + pyfunc, access_types + ) return JitDPPYKernel(pyfunc, debug, ordered_arg_access_types) return _kernel_autojit @@ -64,7 +66,9 @@ def _kernel_jit(signature, debug, access_types): def _wrapped(pyfunc): current_queue = dpctl.get_current_queue() - ordered_arg_access_types = get_ordered_arg_access_types(pyfunc, access_types) + ordered_arg_access_types = get_ordered_arg_access_types( + pyfunc, access_types + ) # We create an instance of JitDPPYKernel to make sure at call time # we are going through the caching mechanism. dppy_kernel = JitDPPYKernel(pyfunc, debug, ordered_arg_access_types) diff --git a/numba_dppy/dpctl_functions.py b/numba_dppy/dpctl_functions.py index 5226a6bd73..54bd6b0718 100644 --- a/numba_dppy/dpctl_functions.py +++ b/numba_dppy/dpctl_functions.py @@ -30,7 +30,9 @@ def dpctl_malloc_shared(): def dpctl_queue_memcpy(): ret_type = types.voidptr - sig = signature(ret_type, types.voidptr, types.voidptr, types.voidptr, types.int64) + sig = signature( + ret_type, types.voidptr, types.voidptr, types.voidptr, types.int64 + ) return types.ExternalFunction("DPCTLQueue_Memcpy", sig) diff --git a/numba_dppy/dpnp_glue/dpnp_array_creations_impl.py b/numba_dppy/dpnp_glue/dpnp_array_creations_impl.py index 67c3263371..dbc578b5ad 100644 --- a/numba_dppy/dpnp_glue/dpnp_array_creations_impl.py +++ b/numba_dppy/dpnp_glue/dpnp_array_creations_impl.py @@ -224,7 +224,9 @@ def dpnp_impl(a, b): dpctl_functions.event_delete(event) out = np.arange(0, a.size, 1, res_dtype) - out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) + out_usm = dpctl_functions.malloc_shared( + out.size * out.itemsize, sycl_queue + ) dpnp_func(b_usm, out_usm, a.size) @@ -260,7 +262,9 @@ def dpnp_trace_impl(a): void dpnp_trace_c(const void* array1_in, void* result1, const size_t* shape_, const size_t ndim) """ - sig = signature(ret_type, types.voidptr, types.voidptr, types.voidptr, types.intp) + sig = signature( + ret_type, types.voidptr, types.voidptr, types.voidptr, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) PRINT_DEBUG = dpnp_lowering.DEBUG diff --git a/numba_dppy/dpnp_glue/dpnp_array_ops_impl.py b/numba_dppy/dpnp_glue/dpnp_array_ops_impl.py index 5c08aaf031..4507094f99 100644 --- a/numba_dppy/dpnp_glue/dpnp_array_ops_impl.py +++ b/numba_dppy/dpnp_glue/dpnp_array_ops_impl.py @@ -145,7 +145,9 @@ def dpnp_impl(a): dpctl_functions.event_delete(event) out = np.arange(0, a.size, 1, res_dtype) - out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) + out_usm = dpctl_functions.malloc_shared( + out.size * out.itemsize, sycl_queue + ) dpnp_func(a_usm, out_usm, a.size) @@ -201,7 +203,9 @@ def dpnp_impl(a): dpctl_functions.event_delete(event) out = np.arange(0, a.size, 1, res_dtype) - out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) + out_usm = dpctl_functions.malloc_shared( + out.size * out.itemsize, sycl_queue + ) dpnp_func(a_usm, out_usm, a.size) @@ -261,7 +265,9 @@ def dpnp_impl(a, ind): dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) - ind_usm = dpctl_functions.malloc_shared(ind.size * ind.itemsize, sycl_queue) + ind_usm = dpctl_functions.malloc_shared( + ind.size * ind.itemsize, sycl_queue + ) event = dpctl_functions.queue_memcpy( sycl_queue, ind_usm, ind.ctypes, ind.size * ind.itemsize ) @@ -269,7 +275,9 @@ def dpnp_impl(a, ind): dpctl_functions.event_delete(event) out = np.arange(0, ind.size, 1, res_dtype).reshape(ind.shape) - out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) + out_usm = dpctl_functions.malloc_shared( + out.size * out.itemsize, sycl_queue + ) dpnp_func(a_usm, a.size * a.itemsize, ind_usm, out_usm, ind.size) diff --git a/numba_dppy/dpnp_glue/dpnp_indexing.py b/numba_dppy/dpnp_glue/dpnp_indexing.py index 3c5db63d12..60b4fbe5a4 100644 --- a/numba_dppy/dpnp_glue/dpnp_indexing.py +++ b/numba_dppy/dpnp_glue/dpnp_indexing.py @@ -92,7 +92,9 @@ def dpnp_impl(a, offset=0): dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) - out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) + out_usm = dpctl_functions.malloc_shared( + out.size * out.itemsize, sycl_queue + ) dpnp_func( a_usm, diff --git a/numba_dppy/dpnp_glue/dpnp_linalgimpl.py b/numba_dppy/dpnp_glue/dpnp_linalgimpl.py index 4da429b379..2bc383565b 100644 --- a/numba_dppy/dpnp_glue/dpnp_linalgimpl.py +++ b/numba_dppy/dpnp_glue/dpnp_linalgimpl.py @@ -39,7 +39,9 @@ def dpnp_eig_impl(a): void dpnp_eig_c(const void* array_in, void* result1, void* result2, size_t size) """ - sig = signature(ret_type, types.voidptr, types.voidptr, types.voidptr, types.intp) + sig = signature( + ret_type, types.voidptr, types.voidptr, types.voidptr, types.intp + ) dpnp_eig = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) res_dtype = np.float64 @@ -69,8 +71,12 @@ def dpnp_impl(a): dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) - wr_usm = dpctl_functions.malloc_shared(wr.size * wr.itemsize, sycl_queue) - vr_usm = dpctl_functions.malloc_shared(vr.size * vr.itemsize, sycl_queue) + wr_usm = dpctl_functions.malloc_shared( + wr.size * wr.itemsize, sycl_queue + ) + vr_usm = dpctl_functions.malloc_shared( + vr.size * vr.itemsize, sycl_queue + ) dpnp_eig(a_usm, wr_usm, vr_usm, n) @@ -103,7 +109,9 @@ def common_matmul_impl(dpnp_func, a, b, out, m, n, k, print_debug): sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) - dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) + dpctl_functions.queue_memcpy( + sycl_queue, a_usm, a.ctypes, a.size * a.itemsize + ) b_usm = dpctl_functions.malloc_shared(b.size * b.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy( @@ -336,7 +344,9 @@ def dpnp_dot_impl(a, b): ndims = [a.ndim, b.ndim] if ndims == [2, 2]: - dpnp_func = dpnp_ext.dpnp_func("dpnp_matmul", [a.dtype.name, "NONE"], sig) + dpnp_func = dpnp_ext.dpnp_func( + "dpnp_matmul", [a.dtype.name, "NONE"], sig + ) def dot_2_mm(a, b): m, k = a.shape @@ -352,7 +362,9 @@ def dot_2_mm(a, b): return dot_2_mm elif ndims == [2, 1]: - dpnp_func = dpnp_ext.dpnp_func("dpnp_matmul", [a.dtype.name, "NONE"], sig) + dpnp_func = dpnp_ext.dpnp_func( + "dpnp_matmul", [a.dtype.name, "NONE"], sig + ) def dot_2_mv(a, b): m, k = a.shape @@ -369,7 +381,9 @@ def dot_2_mv(a, b): return dot_2_mv elif ndims == [1, 2]: - dpnp_func = dpnp_ext.dpnp_func("dpnp_matmul", [a.dtype.name, "NONE"], sig) + dpnp_func = dpnp_ext.dpnp_func( + "dpnp_matmul", [a.dtype.name, "NONE"], sig + ) def dot_2_vm(a, b): (m,) = a.shape @@ -508,7 +522,9 @@ def dpnp_matrix_power_impl(a, n): def dpnp_impl(a, n): if n < 0: - raise ValueError("n < 0 is not supported for np.linalg.matrix_power(a, n)") + raise ValueError( + "n < 0 is not supported for np.linalg.matrix_power(a, n)" + ) if n == 0: if PRINT_DEBUG: @@ -559,7 +575,9 @@ def dpnp_impl(a): dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) - out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) + out_usm = dpctl_functions.malloc_shared( + out.size * out.itemsize, sycl_queue + ) dpnp_func(a_usm, out_usm, a.shapeptr) @@ -594,7 +612,9 @@ def dpnp_det_impl(a): Function declaration: void custom_det_c(void* array1_in, void* result1, size_t* shape, size_t ndim) """ - sig = signature(ret_type, types.voidptr, types.voidptr, types.voidptr, types.intp) + sig = signature( + ret_type, types.voidptr, types.voidptr, types.voidptr, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) PRINT_DEBUG = dpnp_lowering.DEBUG @@ -619,7 +639,9 @@ def dpnp_impl(a): dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) - out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) + out_usm = dpctl_functions.malloc_shared( + out.size * out.itemsize, sycl_queue + ) dpnp_func(a_usm, out_usm, a.shapeptr, a.ndim) @@ -657,15 +679,21 @@ def dpnp_matrix_rank_impl(M, tol=None, hermitian=False): Function declaration: void custom_matrix_rank_c(void* array1_in, void* result1, size_t* shape, size_t ndim) """ - sig = signature(ret_type, types.voidptr, types.voidptr, types.voidptr, types.intp) + sig = signature( + ret_type, types.voidptr, types.voidptr, types.voidptr, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [M.dtype.name, "NONE"], sig) PRINT_DEBUG = dpnp_lowering.DEBUG def dpnp_impl(M, tol=None, hermitian=False): if tol is not None: - raise ValueError("tol is not supported for np.linalg.matrix_rank(M)") + raise ValueError( + "tol is not supported for np.linalg.matrix_rank(M)" + ) if hermitian: - raise ValueError("hermitian is not supported for np.linalg.matrix_rank(M)") + raise ValueError( + "hermitian is not supported for np.linalg.matrix_rank(M)" + ) if M.ndim > 2: raise ValueError( @@ -682,7 +710,9 @@ def dpnp_impl(M, tol=None, hermitian=False): dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) - out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) + out_usm = dpctl_functions.malloc_shared( + out.size * out.itemsize, sycl_queue + ) dpnp_func(M_usm, out_usm, M.shapeptr, M.ndim) diff --git a/numba_dppy/dpnp_glue/dpnp_logic.py b/numba_dppy/dpnp_glue/dpnp_logic.py index 815b985696..789753c024 100644 --- a/numba_dppy/dpnp_glue/dpnp_logic.py +++ b/numba_dppy/dpnp_glue/dpnp_logic.py @@ -56,7 +56,9 @@ def dpnp_impl(a): dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) - out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) + out_usm = dpctl_functions.malloc_shared( + out.size * out.itemsize, sycl_queue + ) dpnp_func(a_usm, out_usm, a.size) diff --git a/numba_dppy/dpnp_glue/dpnp_manipulation.py b/numba_dppy/dpnp_glue/dpnp_manipulation.py index 9687730f8c..dc8d50378d 100644 --- a/numba_dppy/dpnp_glue/dpnp_manipulation.py +++ b/numba_dppy/dpnp_glue/dpnp_manipulation.py @@ -67,7 +67,9 @@ def dpnp_impl(a, repeats): dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) - out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) + out_usm = dpctl_functions.malloc_shared( + out.size * out.itemsize, sycl_queue + ) dpnp_func(a_usm, out_usm, repeats, a.size) diff --git a/numba_dppy/dpnp_glue/dpnp_randomimpl.py b/numba_dppy/dpnp_glue/dpnp_randomimpl.py index d85fb85deb..495eb1c110 100644 --- a/numba_dppy/dpnp_glue/dpnp_randomimpl.py +++ b/numba_dppy/dpnp_glue/dpnp_randomimpl.py @@ -116,7 +116,9 @@ def common_impl_2_arg(arg1, arg2, res, dpnp_func, print_debug): @register_jitable -def common_impl_hypergeometric(ngood, nbad, nsample, res, dpnp_func, print_debug): +def common_impl_hypergeometric( + ngood, nbad, nsample, res, dpnp_func, print_debug +): sycl_queue = dpctl_functions.get_current_queue() res_usm = dpctl_functions.malloc_shared(res.size * res.itemsize, sycl_queue) @@ -141,7 +143,9 @@ def common_impl_multinomial(n, pvals, res, dpnp_func, print_debug): sycl_queue = dpctl_functions.get_current_queue() res_usm = dpctl_functions.malloc_shared(res.size * res.itemsize, sycl_queue) - pvals_usm = dpctl_functions.malloc_shared(pvals.size * pvals.itemsize, sycl_queue) + pvals_usm = dpctl_functions.malloc_shared( + pvals.size * pvals.itemsize, sycl_queue + ) event = dpctl_functions.queue_memcpy( sycl_queue, pvals_usm, pvals.ctypes, pvals.size * pvals.itemsize ) @@ -172,7 +176,9 @@ def common_impl_multivariate_normal( sycl_queue = dpctl_functions.get_current_queue() res_usm = dpctl_functions.malloc_shared(res.size * res.itemsize, sycl_queue) - mean_usm = dpctl_functions.malloc_shared(mean.size * mean.itemsize, sycl_queue) + mean_usm = dpctl_functions.malloc_shared( + mean.size * mean.itemsize, sycl_queue + ) event = dpctl_functions.queue_memcpy( sycl_queue, mean_usm, mean.ctypes, mean.size * mean.itemsize ) @@ -186,7 +192,9 @@ def common_impl_multivariate_normal( dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) - dpnp_func(res_usm, mean.size, mean_usm, mean.size, cov_usm, cov.size, res.size) + dpnp_func( + res_usm, mean.size, mean_usm, mean.size, cov_usm, cov.size, res.size + ) event = dpctl_functions.queue_memcpy( sycl_queue, res.ctypes, res_usm, res.size * res.itemsize @@ -221,7 +229,9 @@ def dpnp_random_impl(size): void custom_rng_uniform_c(void* result, long low, long high, size_t size) """ - sig = signature(ret_type, types.voidptr, types.int64, types.int64, types.intp) + sig = signature( + ret_type, types.voidptr, types.int64, types.int64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["float64", "NONE"], sig) res_dtype = np.float64 @@ -251,7 +261,9 @@ def dpnp_random_impl(*size): void custom_rng_uniform_c(void* result, long low, long high, size_t size) """ - sig = signature(ret_type, types.voidptr, types.int64, types.int64, types.intp) + sig = signature( + ret_type, types.voidptr, types.int64, types.int64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["float64", "NONE"], sig) res_dtype = np.float64 @@ -281,7 +293,9 @@ def dpnp_random_impl(low, high=None, size=None): void custom_rng_uniform_c(void* result, long low, long high, size_t size) """ - sig = signature(ret_type, types.voidptr, types.int64, types.int64, types.intp) + sig = signature( + ret_type, types.voidptr, types.int64, types.int64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["int32", "NONE"], sig) res_dtype = np.int32 @@ -337,7 +351,9 @@ def dpnp_random_impl(low, high=None, size=None): void custom_rng_uniform_c(void* result, long low, long high, size_t size) """ - sig = signature(ret_type, types.voidptr, types.int64, types.int64, types.intp) + sig = signature( + ret_type, types.voidptr, types.int64, types.int64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["int32", "NONE"], sig) res_dtype = np.int32 @@ -393,7 +409,9 @@ def dpnp_random_impl(a, b, size=None): void custom_rng_beta_c(void* result, _DataType a, _DataType b, size_t size) """ - sig = signature(ret_type, types.voidptr, types.float64, types.float64, types.intp) + sig = signature( + ret_type, types.voidptr, types.float64, types.float64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["float64", "NONE"], sig) res_dtype = np.float64 PRINT_DEBUG = dpnp_lowering.DEBUG @@ -436,7 +454,9 @@ def dpnp_random_impl(n, p, size=None): void custom_rng_binomial_c(void* result, int ntrial, double p, size_t size) """ - sig = signature(ret_type, types.voidptr, types.int32, types.float64, types.intp) + sig = signature( + ret_type, types.voidptr, types.int32, types.float64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["int32", "NONE"], sig) res_dtype = np.int32 PRINT_DEBUG = dpnp_lowering.DEBUG @@ -557,7 +577,9 @@ def dpnp_random_impl(shape, scale=1.0, size=None): Function declaration: void custom_rng_gamma_c(void* result, _DataType shape, _DataType scale, size_t size) """ - sig = signature(ret_type, types.voidptr, types.float64, types.float64, types.intp) + sig = signature( + ret_type, types.voidptr, types.float64, types.float64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["float64", "NONE"], sig) res_dtype = np.float64 PRINT_DEBUG = dpnp_lowering.DEBUG @@ -638,7 +660,9 @@ def dpnp_random_impl(loc=0.0, scale=1.0, size=None): Function declaration: void custom_rng_gumbel_c(void* result, double loc, double scale, size_t size) """ - sig = signature(ret_type, types.voidptr, types.float64, types.float64, types.intp) + sig = signature( + ret_type, types.voidptr, types.float64, types.float64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["float64", "NONE"], sig) res_dtype = np.float64 PRINT_DEBUG = dpnp_lowering.DEBUG @@ -683,7 +707,12 @@ def dpnp_random_impl(ngood, nbad, nsample, size=None): void custom_rng_hypergeometric_c(void* result, int l, int s, int m, size_t size) """ sig = signature( - ret_type, types.voidptr, types.int32, types.int32, types.int32, types.intp + ret_type, + types.voidptr, + types.int32, + types.int32, + types.int32, + types.intp, ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["int32", "NONE"], sig) @@ -735,7 +764,9 @@ def dpnp_random_impl(loc=0.0, scale=1.0, size=None): Function declaration: void custom_rng_laplace_c(void* result, double loc, double scale, size_t size) """ - sig = signature(ret_type, types.voidptr, types.float64, types.float64, types.intp) + sig = signature( + ret_type, types.voidptr, types.float64, types.float64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["float64", "NONE"], sig) res_dtype = np.float64 PRINT_DEBUG = dpnp_lowering.DEBUG @@ -779,7 +810,9 @@ def dpnp_random_impl(mean=0.0, sigma=1.0, size=None): Function declaration: void custom_rng_lognormal_c(void* result, _DataType mean, _DataType stddev, size_t size) """ - sig = signature(ret_type, types.voidptr, types.float64, types.float64, types.intp) + sig = signature( + ret_type, types.voidptr, types.float64, types.float64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["float64", "NONE"], sig) res_dtype = np.float64 PRINT_DEBUG = dpnp_lowering.DEBUG @@ -825,7 +858,12 @@ def dpnp_random_impl(n, pvals, size=None): const size_t p_vector_size, size_t size) """ sig = signature( - ret_type, types.voidptr, types.int32, types.voidptr, types.intp, types.intp + ret_type, + types.voidptr, + types.int32, + types.voidptr, + types.intp, + types.intp, ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["int32", "NONE"], sig) @@ -961,7 +999,9 @@ def dpnp_random_impl(n, p, size=None): Function declaration: void custom_rng_negative_binomial_c(void* result, double a, double p, size_t size) """ - sig = signature(ret_type, types.voidptr, types.int32, types.float64, types.intp) + sig = signature( + ret_type, types.voidptr, types.int32, types.float64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["int32", "NONE"], sig) res_dtype = np.int32 PRINT_DEBUG = dpnp_lowering.DEBUG @@ -1003,7 +1043,9 @@ def dpnp_random_impl(loc=0.0, scale=1.0, size=None): Function declaration: void custom_rng_normal_c(void* result, _DataType mean, _DataType stddev, size_t size) """ - sig = signature(ret_type, types.voidptr, types.float64, types.float64, types.intp) + sig = signature( + ret_type, types.voidptr, types.float64, types.float64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["float64", "NONE"], sig) res_dtype = np.float64 PRINT_DEBUG = dpnp_lowering.DEBUG @@ -1238,7 +1280,9 @@ def dpnp_random_impl(size=None): Function declaration: void custom_rng_normal_c(void* result, _DataType mean, _DataType stddev, size_t size) """ - sig = signature(ret_type, types.voidptr, types.float64, types.float64, types.intp) + sig = signature( + ret_type, types.voidptr, types.float64, types.float64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["float64", "NONE"], sig) res_dtype = np.float64 PRINT_DEBUG = dpnp_lowering.DEBUG @@ -1274,7 +1318,9 @@ def dpnp_random_impl(low=0.0, high=1.0, size=None): Function declaration: void custom_rng_uniform_c(void* result, long low, long high, size_t size) """ - sig = signature(ret_type, types.voidptr, types.int64, types.int64, types.intp) + sig = signature( + ret_type, types.voidptr, types.int64, types.int64, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, ["float64", "NONE"], sig) res_dtype = np.float64 diff --git a/numba_dppy/dpnp_glue/dpnp_sort_search_countimpl.py b/numba_dppy/dpnp_glue/dpnp_sort_search_countimpl.py index a5be02514e..d4bc4aed11 100644 --- a/numba_dppy/dpnp_glue/dpnp_sort_search_countimpl.py +++ b/numba_dppy/dpnp_glue/dpnp_sort_search_countimpl.py @@ -172,7 +172,9 @@ def dpnp_impl(a): dpctl_functions.event_delete(event) out = np.arange(0, a.size, 1, res_dtype) - out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) + out_usm = dpctl_functions.malloc_shared( + out.size * out.itemsize, sycl_queue + ) dpnp_func(a_usm, out_usm, a.size) @@ -239,14 +241,18 @@ def dpnp_impl(a, kth): dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) - arr2_usm = dpctl_functions.malloc_shared(arr2.size * arr2.itemsize, sycl_queue) + arr2_usm = dpctl_functions.malloc_shared( + arr2.size * arr2.itemsize, sycl_queue + ) event = dpctl_functions.queue_memcpy( sycl_queue, arr2_usm, arr2.ctypes, arr2.size * arr2.itemsize ) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) - out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) + out_usm = dpctl_functions.malloc_shared( + out.size * out.itemsize, sycl_queue + ) dpnp_func(a_usm, arr2_usm, out_usm, kth_, a.shapeptr, a.ndim) diff --git a/numba_dppy/dpnp_glue/dpnp_statisticsimpl.py b/numba_dppy/dpnp_glue/dpnp_statisticsimpl.py index e84e2eb8cb..e91f1e6ec5 100644 --- a/numba_dppy/dpnp_glue/dpnp_statisticsimpl.py +++ b/numba_dppy/dpnp_glue/dpnp_statisticsimpl.py @@ -73,7 +73,9 @@ def dpnp_impl(a): axis, naxis = 0, 0 - dpnp_func(a_usm, out_usm, a.size * a.itemsize, a.shapeptr, a.ndim, axis, naxis) + dpnp_func( + a_usm, out_usm, a.size * a.itemsize, a.shapeptr, a.ndim, axis, naxis + ) out = np.empty(1, dtype=a.dtype) event = dpctl_functions.queue_memcpy( @@ -142,7 +144,13 @@ def dpnp_impl(a): out_usm = dpctl_functions.malloc_shared(a.itemsize, sycl_queue) dpnp_func( - a_usm, out_usm, a.size * a.itemsize, a.shapeptr, a.ndim, a.shapeptr, 0 + a_usm, + out_usm, + a.size * a.itemsize, + a.shapeptr, + a.ndim, + a.shapeptr, + 0, ) out = np.empty(1, dtype=a.dtype) @@ -320,7 +328,9 @@ def dpnp_cov_impl(a): Function declaration: void custom_cov_c(void* array1_in, void* result1, size_t nrows, size_t ncols) """ - sig = signature(ret_type, types.voidptr, types.voidptr, types.intp, types.intp) + sig = signature( + ret_type, types.voidptr, types.voidptr, types.intp, types.intp + ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) res_dtype = np.float64 @@ -361,7 +371,9 @@ def dpnp_impl(a): cols = a.shape[0] out = np.empty(rows, dtype=res_dtype) - out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) + out_usm = dpctl_functions.malloc_shared( + out.size * out.itemsize, sycl_queue + ) dpnp_func(a_usm, out_usm, rows, cols) diff --git a/numba_dppy/dpnp_glue/dpnp_transcendentalsimpl.py b/numba_dppy/dpnp_glue/dpnp_transcendentalsimpl.py index 88a81b336f..04ffecbce7 100644 --- a/numba_dppy/dpnp_glue/dpnp_transcendentalsimpl.py +++ b/numba_dppy/dpnp_glue/dpnp_transcendentalsimpl.py @@ -44,7 +44,9 @@ def common_impl(a, out, dpnp_func, print_debug): initial = 0 where = 0 - dpnp_func(out_usm, a_usm, a.shapeptr, a.ndim, axes, axes_ndim, initial, where) + dpnp_func( + out_usm, a_usm, a.shapeptr, a.ndim, axes, axes_ndim, initial, where + ) event = dpctl_functions.queue_memcpy( sycl_queue, out.ctypes, out_usm, out.size * out.itemsize diff --git a/numba_dppy/dpnp_glue/dpnpimpl.py b/numba_dppy/dpnp_glue/dpnpimpl.py index a3d68df1b2..77a54a780c 100644 --- a/numba_dppy/dpnp_glue/dpnpimpl.py +++ b/numba_dppy/dpnp_glue/dpnpimpl.py @@ -62,7 +62,10 @@ def get_pointer(obj): def array_shapeptr(context, builder, typ, value): shape_ptr = builder.gep( value.operands[0], - [context.get_constant(types.int32, 0), context.get_constant(types.int32, 5)], + [ + context.get_constant(types.int32, 0), + context.get_constant(types.int32, 5), + ], ) return builder.bitcast(shape_ptr, ll_void_p) diff --git a/numba_dppy/dppy_array_type.py b/numba_dppy/dppy_array_type.py index 9a86d21b2e..0e0b450fe1 100644 --- a/numba_dppy/dppy_array_type.py +++ b/numba_dppy/dppy_array_type.py @@ -43,7 +43,9 @@ def __init__( aligned=aligned, ) - def copy(self, dtype=None, ndim=None, layout=None, readonly=None, addrspace=None): + def copy( + self, dtype=None, ndim=None, layout=None, readonly=None, addrspace=None + ): if dtype is None: dtype = self.dtype if ndim is None: @@ -86,11 +88,20 @@ class DPPYArrayModel(StructModel): def __init__(self, dmm, fe_type): ndim = fe_type.ndim members = [ - ("meminfo", types.CPointer(fe_type.dtype, addrspace=fe_type.addrspace)), - ("parent", types.CPointer(fe_type.dtype, addrspace=fe_type.addrspace)), + ( + "meminfo", + types.CPointer(fe_type.dtype, addrspace=fe_type.addrspace), + ), + ( + "parent", + types.CPointer(fe_type.dtype, addrspace=fe_type.addrspace), + ), ("nitems", types.intp), ("itemsize", types.intp), - ("data", types.CPointer(fe_type.dtype, addrspace=fe_type.addrspace)), + ( + "data", + types.CPointer(fe_type.dtype, addrspace=fe_type.addrspace), + ), ("shape", types.UniTuple(types.intp, ndim)), ("strides", types.UniTuple(types.intp, ndim)), ] diff --git a/numba_dppy/dppy_lowerer.py b/numba_dppy/dppy_lowerer.py index d0b87bb7a2..fa3546ce91 100644 --- a/numba_dppy/dppy_lowerer.py +++ b/numba_dppy/dppy_lowerer.py @@ -49,7 +49,10 @@ from numba.core.typing import signature import warnings -from numba.core.errors import NumbaParallelSafetyWarning, NumbaPerformanceWarning +from numba.core.errors import ( + NumbaParallelSafetyWarning, + NumbaPerformanceWarning, +) from .dufunc_inliner import dufunc_inliner from numba_dppy.driver import KernelLaunchOps @@ -147,7 +150,9 @@ def _dbgprint_after_each_array_assignments(lowerer, loop_body, typemap): new_block.append(assign_lhs) # Make print node - print_node = ir.Print(args=[lhs, inst.target], vararg=None, loc=loc) + print_node = ir.Print( + args=[lhs, inst.target], vararg=None, loc=loc + ) new_block.append(print_node) sig = numba.typing.signature( types.none, typemap[lhs.name], typemap[inst.target.name] @@ -168,7 +173,9 @@ def replace_var_with_array_in_block(vars, block, typemap, calltypes): const_assign = ir.Assign(const_node, const_var, inst.loc) new_block.append(const_assign) - setitem_node = ir.SetItem(inst.target, const_var, inst.value, inst.loc) + setitem_node = ir.SetItem( + inst.target, const_var, inst.value, inst.loc + ) calltypes[setitem_node] = signature( types.none, types.npytypes.Array(typemap[inst.target.name], 1, "C"), @@ -181,7 +188,9 @@ def replace_var_with_array_in_block(vars, block, typemap, calltypes): replace_var_with_array_internal( vars, {0: inst.init_block}, typemap, calltypes ) - replace_var_with_array_internal(vars, inst.loop_body, typemap, calltypes) + replace_var_with_array_internal( + vars, inst.loop_body, typemap, calltypes + ) new_block.append(inst) return new_block @@ -189,7 +198,9 @@ def replace_var_with_array_in_block(vars, block, typemap, calltypes): def replace_var_with_array_internal(vars, loop_body, typemap, calltypes): for label, block in loop_body.items(): - block.body = replace_var_with_array_in_block(vars, block, typemap, calltypes) + block.body = replace_var_with_array_in_block( + vars, block, typemap, calltypes + ) def replace_var_with_array(vars, loop_body, typemap, calltypes): @@ -317,7 +328,9 @@ def _create_gufunc_for_parfor_body( parfor_params.add(stop.name) # Get just the outputs of the parfor. - parfor_outputs = numba.parfors.parfor.get_parfor_outputs(parfor, parfor_params) + parfor_outputs = numba.parfors.parfor.get_parfor_outputs( + parfor, parfor_params + ) # Get all parfor reduction vars, and operators. typemap = lowerer.fndesc.typemap @@ -381,7 +394,11 @@ def addrspace_from(params, def_addr): if config.DEBUG_ARRAY_OPT >= 1: print("ind_dict = ", sorted(ind_dict.items()), type(ind_dict)) - print("legal_loop_indices = ", legal_loop_indices, type(legal_loop_indices)) + print( + "legal_loop_indices = ", + legal_loop_indices, + type(legal_loop_indices), + ) for pd in parfor_params: print("pd = ", pd) @@ -490,7 +507,9 @@ def print_arg_with_addrspaces(args): # rename all variables in gufunc_ir afresh var_table = get_name_var_table(gufunc_ir.blocks) new_var_dict = {} - reserved_names = [sentinel_name] + list(param_dict.values()) + legal_loop_indices + reserved_names = ( + [sentinel_name] + list(param_dict.values()) + legal_loop_indices + ) for name, var in var_table.items(): if not (name in reserved_names): new_var_dict[name] = mk_unique_var(name) @@ -507,7 +526,10 @@ def print_arg_with_addrspaces(args): if config.DEBUG_ARRAY_OPT: print( - "gufunc_param_types = ", type(gufunc_param_types), "\n", gufunc_param_types + "gufunc_param_types = ", + type(gufunc_param_types), + "\n", + gufunc_param_types, ) gufunc_stub_last_label = max(gufunc_ir.blocks.keys()) + 1 @@ -541,7 +563,10 @@ def print_arg_with_addrspaces(args): # store hoisted into diagnostics diagnostics = lowerer.metadata["parfor_diagnostics"] - diagnostics.hoist_info[parfor.id] = {"hoisted": hoisted, "not_hoisted": not_hoisted} + diagnostics.hoist_info[parfor.id] = { + "hoisted": hoisted, + "not_hoisted": not_hoisted, + } lowerer.metadata["parfor_diagnostics"].extra_info[str(parfor.id)] = str( dpctl.get_current_queue().get_sycl_device().name @@ -554,7 +579,10 @@ def print_arg_with_addrspaces(args): # Search all the block in the gufunc outline for the sentinel assignment. for label, block in gufunc_ir.blocks.items(): for i, inst in enumerate(block.body): - if isinstance(inst, ir.Assign) and inst.target.name == sentinel_name: + if ( + isinstance(inst, ir.Assign) + and inst.target.name == sentinel_name + ): # We found the sentinel assignment. loc = inst.loc scope = block.scope @@ -581,7 +609,9 @@ def print_arg_with_addrspaces(args): gufunc_ir.blocks[label] = prev_block # Add a jump from the last parfor body block to the block # containing statements after the sentinel. - gufunc_ir.blocks[body_last_label].append(ir.Jump(new_label, loc)) + gufunc_ir.blocks[body_last_label].append( + ir.Jump(new_label, loc) + ) break else: continue @@ -772,7 +802,12 @@ def _lower_parfor_gufunc(lowerer, parfor): print("loop_ranges = ", loop_ranges) gu_signature = _create_shape_signature( - parfor.get_shape_classes, num_inputs, func_args, func_sig, parfor.races, typemap + parfor.get_shape_classes, + num_inputs, + func_args, + func_sig, + parfor.races, + typemap, ) generate_kernel_launch_ops( @@ -948,7 +983,9 @@ def val_type_or_none(context, lowerer, x): return None all_llvm_args = [getvar_or_none(lowerer, x) for x in expr_args[:ninouts]] - all_val_types = [val_type_or_none(context, lowerer, x) for x in expr_args[:ninouts]] + all_val_types = [ + val_type_or_none(context, lowerer, x) for x in expr_args[:ninouts] + ] all_args = [loadvar_or_none(lowerer, x) for x in expr_args[:ninouts]] keep_alive_kernels.append(cres) @@ -1128,7 +1165,9 @@ def relatively_deep_copy(obj, memo): return cpy if isinstance(obj, FreeVar): - cpy = FreeVar(index=obj.index, name=obj.name, value=obj.value, loc=obj.loc) + cpy = FreeVar( + index=obj.index, name=obj.name, value=obj.value, loc=obj.loc + ) memo[obj_id] = cpy return cpy @@ -1145,7 +1184,9 @@ def relatively_deep_copy(obj, memo): cpy = copy.copy(obj) cpy.clear() for key, item in obj.items(): - cpy[relatively_deep_copy(key, memo)] = relatively_deep_copy(item, memo) + cpy[relatively_deep_copy(key, memo)] = relatively_deep_copy( + item, memo + ) memo[obj_id] = cpy return cpy elif isinstance(obj, tuple): @@ -1232,9 +1273,13 @@ def __init__(self, context, library, fndesc, func_ir, metadata=None): func_ir_cpu = relatively_deep_copy(func_ir, memo) cpu_context = ( - context.cpu_context if isinstance(context, DPPYTargetContext) else context + context.cpu_context + if isinstance(context, DPPYTargetContext) + else context + ) + self.gpu_lower = self._lower( + context, library, fndesc, func_ir, metadata ) - self.gpu_lower = self._lower(context, library, fndesc, func_ir, metadata) self.cpu_lower = self._lower( cpu_context, library, fndesc_cpu, func_ir_cpu, metadata ) @@ -1305,7 +1350,9 @@ def lower(self): # if lower does not crash, and parfor_diagnostics is empty then it # is a kernel function. if not self.gpu_lower.metadata["parfor_diagnostics"].extra_info: - str_name = dpctl.get_current_queue().get_sycl_device().filter_string + str_name = ( + dpctl.get_current_queue().get_sycl_device().filter_string + ) self.gpu_lower.metadata["parfor_diagnostics"].extra_info[ "kernel" ] = str_name @@ -1325,7 +1372,9 @@ def lower(self): dpctl.get_current_queue().get_sycl_device().filter_string ) print( - "Failed to offload parfor to " + device_filter_str + ". Due to:\n", + "Failed to offload parfor to " + + device_filter_str + + ". Due to:\n", e, ) print(traceback.format_exc()) @@ -1366,7 +1415,9 @@ def lower_parfor_rollback(lowerer, parfor): print(msg, parfor.loc) except Exception as e: - device_filter_str = dpctl.get_current_queue().get_sycl_device().filter_string + device_filter_str = ( + dpctl.get_current_queue().get_sycl_device().filter_string + ) msg = ( "Failed to offload parfor to " + device_filter_str + ". Falling " "back to default CPU parallelization. Please file a bug report " diff --git a/numba_dppy/dppy_parfor_diagnostics.py b/numba_dppy/dppy_parfor_diagnostics.py index b6acda7b98..601179071c 100644 --- a/numba_dppy/dppy_parfor_diagnostics.py +++ b/numba_dppy/dppy_parfor_diagnostics.py @@ -75,7 +75,11 @@ def print_g(fadj_, nadj_, nroot, depth): if nadj_[theroot] != []: print_wrapped("Parallel region %s:" % region_id) print_wrapped("%s%s %s" % (sword, theroot, "(parallel)")) - summary[region_id] = {"root": theroot, "fused": 0, "serialized": 0} + summary[region_id] = { + "root": theroot, + "fused": 0, + "serialized": 0, + } print_g(fadj_, nadj_, theroot, 1) print("\n") region_id = region_id + 1 @@ -90,7 +94,11 @@ def print_fuse(ty, pf_id, adj, depth, region_id): msg += ", fused with loop(s): " msg += ", ".join([str(x) for x in fused]) - summary[region_id] = {"root": pf_id, "fused": len(fused), "serialized": 0} + summary[region_id] = { + "root": pf_id, + "fused": len(fused), + "serialized": 0, + } msg += ")" print_wrapped(msg) extra_info = self.extra_info.get(str(region_id)) @@ -115,7 +123,9 @@ def print_fuse(ty, pf_id, adj, depth, region_id): # print the summary of the fuse/serialize rewrite if summary: for k, v in sorted(summary.items()): - msg = "\n \nParallel region %s (loop #%s) had %s " "loop(s) fused" + msg = ( + "\n \nParallel region %s (loop #%s) had %s " "loop(s) fused" + ) root = v["root"] fused = v["fused"] serialized = v["serialized"] diff --git a/numba_dppy/dppy_passbuilder.py b/numba_dppy/dppy_passbuilder.py index 28ed5e13e3..b8308696a4 100644 --- a/numba_dppy/dppy_passbuilder.py +++ b/numba_dppy/dppy_passbuilder.py @@ -84,7 +84,9 @@ def default_numba_nopython_pipeline(state, pm): # inline closures early in case they are using nonlocal's # see issue #6585. - pm.add_pass(InlineClosureLikes, "inline calls to locally defined closures") + pm.add_pass( + InlineClosureLikes, "inline calls to locally defined closures" + ) # pre typing if not state.flags.no_rewrites: @@ -94,7 +96,8 @@ def default_numba_nopython_pipeline(state, pm): # convert any remaining closures into functions pm.add_pass( - MakeFunctionToJitFunction, "convert make_function into JIT functions" + MakeFunctionToJitFunction, + "convert make_function into JIT functions", ) # inline functions that have been determined as inlinable and rerun # branch pruning, this needs to be run after closures are inlined as diff --git a/numba_dppy/dppy_passes.py b/numba_dppy/dppy_passes.py index 64536d36fd..e179da0ea2 100644 --- a/numba_dppy/dppy_passes.py +++ b/numba_dppy/dppy_passes.py @@ -78,7 +78,9 @@ def run_pass(self, state): expr = instr.value if isinstance(expr, ir.Expr): if expr.op == "call": - find_var = block.find_variable_assignment(expr.func.name) + find_var = block.find_variable_assignment( + expr.func.name + ) if find_var is not None: call_node = find_var.value if ( @@ -109,12 +111,16 @@ def run_pass(self, state): error = False # arg can be one constant or a tuple of constant items - arg_type = func_ir.get_definition(arg.name) + arg_type = func_ir.get_definition( + arg.name + ) if isinstance(arg_type, ir.Expr): # we have a tuple for item in arg_type.items: if not isinstance( - func_ir.get_definition(item.name), + func_ir.get_definition( + item.name + ), ir.Const, ): error = True @@ -122,7 +128,9 @@ def run_pass(self, state): else: if not isinstance( - func_ir.get_definition(arg.name), + func_ir.get_definition( + arg.name + ), ir.Const, ): error = True @@ -289,18 +297,22 @@ def run_pass(self, state): ) with fallback_context(state, msg): # Lowering - fndesc = funcdesc.PythonFunctionDescriptor.from_specialized_function( - interp, - typemap, - restype, - calltypes, - mangler=targetctx.mangler, - inline=flags.forceinline, - noalias=flags.noalias, + fndesc = ( + funcdesc.PythonFunctionDescriptor.from_specialized_function( + interp, + typemap, + restype, + calltypes, + mangler=targetctx.mangler, + inline=flags.forceinline, + noalias=flags.noalias, + ) ) with targetctx.push_code_library(library): - lower = DPPYLower(targetctx, library, fndesc, interp, metadata=metadata) + lower = DPPYLower( + targetctx, library, fndesc, interp, metadata=metadata + ) lower.lower() if not flags.no_cpython_wrapper: lower.create_cpython_wrapper(flags.release_gil) @@ -312,14 +324,18 @@ def run_pass(self, state): from numba.core.compiler import _LowerResult # TODO: move this if flags.no_compile: - state["cr"] = _LowerResult(fndesc, call_helper, cfunc=None, env=env) + state["cr"] = _LowerResult( + fndesc, call_helper, cfunc=None, env=env + ) else: # Prepare for execution cfunc = targetctx.get_executable(library, fndesc, env) # Insert native function for use by other jitted-functions. # We also register its library to allow for inlining. targetctx.insert_user_function(cfunc, fndesc, [library]) - state["cr"] = _LowerResult(fndesc, call_helper, cfunc=cfunc, env=env) + state["cr"] = _LowerResult( + fndesc, call_helper, cfunc=cfunc, env=env + ) return True diff --git a/numba_dppy/driver/kernel_launch_ops.py b/numba_dppy/driver/kernel_launch_ops.py index 2e998af84b..f99d38cb70 100644 --- a/numba_dppy/driver/kernel_launch_ops.py +++ b/numba_dppy/driver/kernel_launch_ops.py @@ -75,7 +75,8 @@ def get_current_queue(self): """ sycl_queue_val = cgutils.alloca_once( - self.builder, utils.get_llvm_type(context=self.context, type=types.voidptr) + self.builder, + utils.get_llvm_type(context=self.context, type=types.voidptr), ) fn = DpctlCAPIFnBuilder.get_dpctl_queuemgr_get_current_queue( builder=self.builder, context=self.context @@ -148,23 +149,35 @@ def process_kernel_arg( raise NotImplementedError(arg_type, var) storage = cgutils.alloca_once(self.builder, utils.LLVMTypes.int64_t) - self.builder.store(self.context.get_constant(types.int64, 0), storage) - ty = numba_type_to_dpctl_typenum(context=self.context, type=types.int64) + self.builder.store( + self.context.get_constant(types.int64, 0), storage + ) + ty = numba_type_to_dpctl_typenum( + context=self.context, type=types.int64 + ) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( storage, - utils.get_llvm_type(context=self.context, type=types.voidptr), + utils.get_llvm_type( + context=self.context, type=types.voidptr + ), ), ty, ) storage = cgutils.alloca_once(self.builder, utils.LLVMTypes.int64_t) - self.builder.store(self.context.get_constant(types.int64, 0), storage) - ty = numba_type_to_dpctl_typenum(context=self.context, type=types.int64) + self.builder.store( + self.context.get_constant(types.int64, 0), storage + ) + ty = numba_type_to_dpctl_typenum( + context=self.context, type=types.int64 + ) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( storage, - utils.get_llvm_type(context=self.context, type=types.voidptr), + utils.get_llvm_type( + context=self.context, type=types.voidptr + ), ), ty, ) @@ -178,11 +191,15 @@ def process_kernel_arg( ], ) - ty = numba_type_to_dpctl_typenum(context=self.context, type=types.int64) + ty = numba_type_to_dpctl_typenum( + context=self.context, type=types.int64 + ) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( array_size_member, - utils.get_llvm_type(context=self.context, type=types.voidptr), + utils.get_llvm_type( + context=self.context, type=types.voidptr + ), ), ty, ) @@ -196,11 +213,15 @@ def process_kernel_arg( ], ) - ty = numba_type_to_dpctl_typenum(context=self.context, type=types.int64) + ty = numba_type_to_dpctl_typenum( + context=self.context, type=types.int64 + ) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( item_size_member, - utils.get_llvm_type(context=self.context, type=types.voidptr), + utils.get_llvm_type( + context=self.context, type=types.voidptr + ), ), ty, ) @@ -235,13 +256,17 @@ def process_kernel_arg( # names are replaced using legalize names, we have to do the same # here for them to match. legal_names = legalize_names([var]) - ty = numba_type_to_dpctl_typenum(context=self.context, type=types.voidptr) + ty = numba_type_to_dpctl_typenum( + context=self.context, type=types.voidptr + ) if isinstance(arg_type, nus.UsmSharedArrayType): self._form_kernel_arg_and_arg_ty( self.builder.bitcast( self.builder.load(data_member), - utils.get_llvm_type(context=self.context, type=types.voidptr), + utils.get_llvm_type( + context=self.context, type=types.voidptr + ), ), ty, ) @@ -264,7 +289,9 @@ def process_kernel_arg( # Create void * to hold new USM buffer. buffer_ptr = cgutils.alloca_once( self.builder, - utils.get_llvm_type(context=self.context, type=types.voidptr), + utils.get_llvm_type( + context=self.context, type=types.voidptr + ), name=buffer_name, ) # Setup the args to the USM allocator, size and SYCL queue. @@ -273,12 +300,18 @@ def process_kernel_arg( self.builder.load(sycl_queue_val), ] # Call USM shared allocator and store in buffer_ptr. - self.builder.store(self.builder.call(malloc_fn, args), buffer_ptr) + self.builder.store( + self.builder.call(malloc_fn, args), buffer_ptr + ) if legal_names[var] in modified_arrays: - self.write_buffs.append((buffer_ptr, total_size, data_member)) + self.write_buffs.append( + (buffer_ptr, total_size, data_member) + ) else: - self.read_only_buffs.append((buffer_ptr, total_size, data_member)) + self.read_only_buffs.append( + (buffer_ptr, total_size, data_member) + ) # We really need to detect when an array needs to be copied over if index < self.num_inputs: @@ -297,7 +330,9 @@ def process_kernel_arg( self.builder.call(event_wait_fn, [event_ref]) self.builder.call(event_del_fn, [event_ref]) - self._form_kernel_arg_and_arg_ty(self.builder.load(buffer_ptr), ty) + self._form_kernel_arg_and_arg_ty( + self.builder.load(buffer_ptr), ty + ) # Handle shape shape_member = self.builder.gep( @@ -316,11 +351,15 @@ def process_kernel_arg( self.context.get_constant(types.int32, this_dim), ], ) - ty = numba_type_to_dpctl_typenum(context=self.context, type=types.int64) + ty = numba_type_to_dpctl_typenum( + context=self.context, type=types.int64 + ) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( shape_entry, - utils.get_llvm_type(context=self.context, type=types.voidptr), + utils.get_llvm_type( + context=self.context, type=types.voidptr + ), ), ty, ) @@ -343,21 +382,29 @@ def process_kernel_arg( ], ) - ty = numba_type_to_dpctl_typenum(context=self.context, type=types.int64) + ty = numba_type_to_dpctl_typenum( + context=self.context, type=types.int64 + ) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( stride_entry, - utils.get_llvm_type(context=self.context, type=types.voidptr), + utils.get_llvm_type( + context=self.context, type=types.voidptr + ), ), ty, ) else: - ty = numba_type_to_dpctl_typenum(context=self.context, type=arg_type) + ty = numba_type_to_dpctl_typenum( + context=self.context, type=arg_type + ) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( llvm_arg, - utils.get_llvm_type(context=self.context, type=types.voidptr), + utils.get_llvm_type( + context=self.context, type=types.voidptr + ), ), ty, ) @@ -438,7 +485,9 @@ def enqueue_kernel_and_copy_back(self, dim_bounds, sycl_queue_val): self.builder.bitcast(global_range, intp_ptr_t), self.context.get_constant(types.uintp, num_dim), self.builder.bitcast( - utils.create_null_ptr(builder=self.builder, context=self.context), + utils.create_null_ptr( + builder=self.builder, context=self.context + ), utils.get_llvm_type(context=self.context, type=types.voidptr), ), self.context.get_constant(types.uintp, 0), @@ -463,7 +512,9 @@ def enqueue_kernel_and_copy_back(self, dim_bounds, sycl_queue_val): self.builder.load(sycl_queue_val), self.builder.bitcast( self.builder.load(data_member), - utils.get_llvm_type(context=self.context, type=types.voidptr), + utils.get_llvm_type( + context=self.context, type=types.voidptr + ), ), self.builder.load(buffer_ptr), self.builder.load(total_size), diff --git a/numba_dppy/dufunc_inliner.py b/numba_dppy/dufunc_inliner.py index 44d162ba74..3444bedabb 100644 --- a/numba_dppy/dufunc_inliner.py +++ b/numba_dppy/dufunc_inliner.py @@ -31,7 +31,10 @@ def _run_inliner( typingctx, targetctx, ): - from numba.core.inline_closurecall import callee_ir_validator, inline_closure_call + from numba.core.inline_closurecall import ( + callee_ir_validator, + inline_closure_call, + ) # pass is typed so use the callee globals inline_closure_call( diff --git a/numba_dppy/examples/auto_offload_examples/sum-5d.py b/numba_dppy/examples/auto_offload_examples/sum-5d.py index 0800fd5816..96ead3f5aa 100644 --- a/numba_dppy/examples/auto_offload_examples/sum-5d.py +++ b/numba_dppy/examples/auto_offload_examples/sum-5d.py @@ -52,7 +52,14 @@ def main(): for l in range(N): # noqa for m in range(N): if c[i, j, k, l, m] != 2.0: - print("First index not equal to 2.0 was", i, j, k, l, m) + print( + "First index not equal to 2.0 was", + i, + j, + k, + l, + m, + ) break print("Done...") diff --git a/numba_dppy/examples/pairwise_distance.py b/numba_dppy/examples/pairwise_distance.py index 9a644d8bfa..09c8990708 100644 --- a/numba_dppy/examples/pairwise_distance.py +++ b/numba_dppy/examples/pairwise_distance.py @@ -22,7 +22,9 @@ import numba_dppy as dppy -parser = argparse.ArgumentParser(description="Program to compute pairwise distance") +parser = argparse.ArgumentParser( + description="Program to compute pairwise distance" +) parser.add_argument("-n", type=int, default=10, help="Number of points") parser.add_argument("-d", type=int, default=3, help="Dimensions") diff --git a/numba_dppy/examples/sum_ndarray.py b/numba_dppy/examples/sum_ndarray.py index 5d9ebf66ba..a46e1aca44 100644 --- a/numba_dppy/examples/sum_ndarray.py +++ b/numba_dppy/examples/sum_ndarray.py @@ -21,7 +21,11 @@ @dppy.kernel( - access_types={"read_only": ["a", "b"], "write_only": ["c"], "read_write": []} + access_types={ + "read_only": ["a", "b"], + "write_only": ["c"], + "read_write": [], + } ) def data_parallel_sum(a, b, c): i = dppy.get_global_id(0) diff --git a/numba_dppy/examples/sum_reduction_recursive_ocl.py b/numba_dppy/examples/sum_reduction_recursive_ocl.py index 44fee6fb70..38a8b88068 100644 --- a/numba_dppy/examples/sum_reduction_recursive_ocl.py +++ b/numba_dppy/examples/sum_reduction_recursive_ocl.py @@ -108,7 +108,9 @@ def sum_reduce(A): partial_sums.size * partial_sums.dtype.itemsize ) partial_sums_ndarray = np.ndarray( - partial_sums.shape, buffer=partial_sums_buf, dtype=partial_sums.dtype + partial_sums.shape, + buffer=partial_sums_buf, + dtype=partial_sums.dtype, ) np.copyto(partial_sums_ndarray, partial_sums) diff --git a/numba_dppy/extended_numba_itanium_mangler.py b/numba_dppy/extended_numba_itanium_mangler.py index 968dc64b6f..dce855a543 100644 --- a/numba_dppy/extended_numba_itanium_mangler.py +++ b/numba_dppy/extended_numba_itanium_mangler.py @@ -38,7 +38,9 @@ def mangle_type_or_value(typ): if isinstance(typ, types.CPointer): rc = "P" if typ.addrspace is not None: - rc += "U" + itanium_mangler.mangle_identifier("AS" + str(typ.addrspace)) + rc += "U" + itanium_mangler.mangle_identifier( + "AS" + str(typ.addrspace) + ) rc += itanium_mangler.mangle_type_or_value(typ.dtype) return rc else: diff --git a/numba_dppy/initialize.py b/numba_dppy/initialize.py index 3b31c7ab01..81bbf0a8e4 100644 --- a/numba_dppy/initialize.py +++ b/numba_dppy/initialize.py @@ -45,11 +45,15 @@ def load_dpctl_sycl_interface(): platform = plt.system() if platform == "Windows": paths = glob.glob( - os.path.join(os.path.dirname(dpctl.__file__), "*DPCTLSyclInterface.dll") + os.path.join( + os.path.dirname(dpctl.__file__), "*DPCTLSyclInterface.dll" + ) ) else: paths = glob.glob( - os.path.join(os.path.dirname(dpctl.__file__), "*DPCTLSyclInterface.so") + os.path.join( + os.path.dirname(dpctl.__file__), "*DPCTLSyclInterface.so" + ) ) if len(paths) == 1: diff --git a/numba_dppy/interop.py b/numba_dppy/interop.py index e30c924135..b9f0410576 100644 --- a/numba_dppy/interop.py +++ b/numba_dppy/interop.py @@ -30,7 +30,9 @@ def asarray(container): try: from dpnp.dpnp_array import dpnp_array - if isinstance(container, dpnp_array) and hasattr(container, "_array_obj"): + if isinstance(container, dpnp_array) and hasattr( + container, "_array_obj" + ): import warnings warnings.warn("asarray() uses internals from dpnp.") @@ -38,4 +40,6 @@ def asarray(container): except: pass - raise NotImplementedError("dpctl asarray() does not support " + type(container)) + raise NotImplementedError( + "dpctl asarray() does not support " + type(container) + ) diff --git a/numba_dppy/numpy_usm_shared.py b/numba_dppy/numpy_usm_shared.py index 2eae35326f..f35ac299a9 100644 --- a/numba_dppy/numpy_usm_shared.py +++ b/numba_dppy/numpy_usm_shared.py @@ -32,7 +32,9 @@ from llvmlite import ir from numba import types from numba.core import cgutils, config, types, typing -from numba.core.datamodel.registry import register_default as register_model_default +from numba.core.datamodel.registry import ( + register_default as register_model_default, +) from numba.core.imputils import builtin_registry as lower_registry from numba.core.overload_glue import _overload_glue from numba.core.pythonapi import box @@ -75,7 +77,10 @@ def dprint(*args): import numba_dppy._usm_shared_allocator_ext # Register the helper function in dppl_rt so that we can insert calls to them via llvmlite. -for py_name, c_address in numba_dppy._usm_shared_allocator_ext.c_helpers.items(): +for ( + py_name, + c_address, +) in numba_dppy._usm_shared_allocator_ext.c_helpers.items(): llb.add_symbol(py_name, c_address) @@ -124,7 +129,9 @@ def copy(self, *args, **kwargs): def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if method == "__call__": for inp in inputs: - if not isinstance(inp, (UsmSharedArrayType, types.Array, types.Number)): + if not isinstance( + inp, (UsmSharedArrayType, types.Array, types.Number) + ): return None return UsmSharedArrayType @@ -286,7 +293,13 @@ def numba_register_lower_builtin(): for typs, impl in v._BIND_TYPES.items(): ig = (impl, func, typs) - dprint("Numpy lowered registry functions:", impl, func, type(func), typs) + dprint( + "Numpy lowered registry functions:", + impl, + func, + type(func), + typs, + ) # If it is a Numpy function... if isinstance(func, ftype): dprint("is ftype") @@ -309,7 +322,9 @@ def numba_register_lower_builtin(): # this registry contains functions, getattrs, setattrs, casts and constants... for ig in lower_registry.functions: impl, func, types = ig - dprint("Numpy lowered registry functions:", impl, func, type(func), types) + dprint( + "Numpy lowered registry functions:", impl, func, type(func), types + ) # If it is a Numpy function... if isinstance(func, ftype): dprint("is ftype") @@ -334,7 +349,13 @@ def numba_register_lower_builtin(): types_with_usmarray = types_replace_array(types) if UsmSharedArrayType in types_with_usmarray: dprint( - "lower_getattr:", func, type(func), attr, type(attr), types, type(types) + "lower_getattr:", + func, + type(func), + attr, + type(attr), + types, + type(types), ) todo_getattr.append((func, attr, types_with_usmarray)) @@ -343,12 +364,18 @@ def numba_register_lower_builtin(): for impl, func, types in todo + todo_builtin: try: - usmarray_func = eval("dpctl.tensor.numpy_usm_shared." + func.__name__) + usmarray_func = eval( + "dpctl.tensor.numpy_usm_shared." + func.__name__ + ) except: dprint("failed to eval", func.__name__) continue dprint( - "need to re-register lowerer for usmarray", impl, func, types, usmarray_func + "need to re-register lowerer for usmarray", + impl, + func, + types, + usmarray_func, ) new_impl = copy_func_for_usmarray(impl, nus) lower_registry.functions.append((new_impl, usmarray_func, types)) @@ -356,9 +383,13 @@ def numba_register_lower_builtin(): for impl, func, types in todo_array_member_func: types_with_usmarray = types_replace_array(types) usmarray_func = "usm" + func - dprint("Registering lowerer for", impl, usmarray_func, types_with_usmarray) + dprint( + "Registering lowerer for", impl, usmarray_func, types_with_usmarray + ) new_impl = copy_func_for_usmarray(impl, nus) - lower_registry.functions.append((new_impl, usmarray_func, types_with_usmarray)) + lower_registry.functions.append( + (new_impl, usmarray_func, types_with_usmarray) + ) def argspec_to_string(argspec): @@ -441,7 +472,9 @@ def set_key_original(cls, key, original): cls.original = original def generic_impl(self): - original_typer = self.__class__.original.generic(self.__class__.original) + original_typer = self.__class__.original.generic( + self.__class__.original + ) ot_argspec = inspect.getfullargspec(original_typer) astr = argspec_to_string(ot_argspec) @@ -678,7 +711,9 @@ def sentry_shape_scalar(ty): # vararg case if any(not sentry_shape_scalar(a) for a in args): raise TypeError( - "reshape({0}) is not supported".format(", ".join(map(str, args))) + "reshape({0}) is not supported".format( + ", ".join(map(str, args)) + ) ) retty = ary.copy(ndim=len(args)) @@ -707,9 +742,9 @@ def argsort_stub(kind="quicksort"): pass pysig = utils.pysignature(argsort_stub) - sig = signature(UsmSharedArrayType(types.intp, 1, "C"), kind).replace( - pysig=pysig - ) + sig = signature( + UsmSharedArrayType(types.intp, 1, "C"), kind + ).replace(pysig=pysig) return sig @bound_function("usmarray.view") @@ -769,7 +804,9 @@ def resolve_take(self, ary, args, kws): elif isinstance(argty, types.List): # 1d lists only sig = signature(UsmSharedArrayType(ary.dtype, 1, "C"), *args) elif isinstance(argty, types.BaseTuple): - sig = signature(UsmSharedArrayType(ary.dtype, np.ndim(argty), "C"), *args) + sig = signature( + UsmSharedArrayType(ary.dtype, np.ndim(argty), "C"), *args + ) else: raise TypeError("take(%s) not supported for %s" % argty) return sig @@ -785,7 +822,9 @@ def generic_resolve(self, ary, attr): class DparrayAsNdarray(CallableTemplate): def generic(self): def typer(arg): - return types.Array(dtype=arg.dtype, ndim=arg.ndim, layout=arg.layout) + return types.Array( + dtype=arg.dtype, ndim=arg.ndim, layout=arg.layout + ) return typer @@ -794,7 +833,9 @@ def typer(arg): class DparrayFromNdarray(CallableTemplate): def generic(self): def typer(arg): - return UsmSharedArrayType(dtype=arg.dtype, ndim=arg.ndim, layout=arg.layout) + return UsmSharedArrayType( + dtype=arg.dtype, ndim=arg.ndim, layout=arg.layout + ) return typer diff --git a/numba_dppy/ocl/atomics/__init__.py b/numba_dppy/ocl/atomics/__init__.py index b246e32afb..f2821bdff3 100644 --- a/numba_dppy/ocl/atomics/__init__.py +++ b/numba_dppy/ocl/atomics/__init__.py @@ -17,7 +17,9 @@ def atomic_support_present(): - if os.path.isfile(os.path.join(os.path.dirname(__file__), "atomic_ops.spir")): + if os.path.isfile( + os.path.join(os.path.dirname(__file__), "atomic_ops.spir") + ): return True else: return False diff --git a/numba_dppy/ocl/ocldecl.py b/numba_dppy/ocl/ocldecl.py index 50c3241a8e..b32a9373f7 100644 --- a/numba_dppy/ocl/ocldecl.py +++ b/numba_dppy/ocl/ocldecl.py @@ -157,7 +157,9 @@ def typer(shape, dtype): if not isinstance(shape, types.IntegerLiteral): return None elif isinstance(shape, (types.Tuple, types.UniTuple)): - if any([not isinstance(s, types.IntegerLiteral) for s in shape]): + if any( + [not isinstance(s, types.IntegerLiteral) for s in shape] + ): return None else: return None diff --git a/numba_dppy/ocl/oclimpl.py b/numba_dppy/ocl/oclimpl.py index 498576ece6..1802399003 100644 --- a/numba_dppy/ocl/oclimpl.py +++ b/numba_dppy/ocl/oclimpl.py @@ -119,7 +119,9 @@ def get_num_groups_impl(context, builder, sig, args): @lower(stubs.get_work_dim) def get_work_dim_impl(context, builder, sig, args): - get_work_dim = _declare_function(context, builder, "get_work_dim", sig, ["void"]) + get_work_dim = _declare_function( + context, builder, "get_work_dim", sig, ["void"] + ) res = builder.call(get_work_dim, []) return res @@ -147,7 +149,9 @@ def get_local_size_impl(context, builder, sig, args): @lower(stubs.barrier, types.uint32) def barrier_one_arg_impl(context, builder, sig, args): [flags] = args - barrier = _declare_function(context, builder, "barrier", sig, ["unsigned int"]) + barrier = _declare_function( + context, builder, "barrier", sig, ["unsigned int"] + ) builder.call(barrier, [flags]) return _void_value @@ -156,7 +160,9 @@ def barrier_one_arg_impl(context, builder, sig, args): def barrier_no_arg_impl(context, builder, sig, args): assert not args sig = types.void(types.uint32) - barrier = _declare_function(context, builder, "barrier", sig, ["unsigned int"]) + barrier = _declare_function( + context, builder, "barrier", sig, ["unsigned int"] + ) flags = context.get_constant(types.uint32, stubs.CLK_GLOBAL_MEM_FENCE) builder.call(barrier, [flags]) return _void_value @@ -165,7 +171,9 @@ def barrier_no_arg_impl(context, builder, sig, args): @lower(stubs.mem_fence, types.uint32) def mem_fence_impl(context, builder, sig, args): [flags] = args - mem_fence = _declare_function(context, builder, "mem_fence", sig, ["unsigned int"]) + mem_fence = _declare_function( + context, builder, "mem_fence", sig, ["unsigned int"] + ) builder.call(mem_fence, [flags]) return _void_value @@ -174,7 +182,9 @@ def mem_fence_impl(context, builder, sig, args): def sub_group_barrier_impl(context, builder, sig, args): assert not args sig = types.void(types.uint32) - barrier = _declare_function(context, builder, "barrier", sig, ["unsigned int"]) + barrier = _declare_function( + context, builder, "barrier", sig, ["unsigned int"] + ) flags = context.get_constant(types.uint32, stubs.CLK_LOCAL_MEM_FENCE) builder.call(barrier, [flags]) return _void_value @@ -203,9 +213,13 @@ def insert_and_call_atomic_fn( elif fn_type == "sub": name = "numba_dppy_atomic_sub_f64" else: - raise TypeError("Operation type is not supported %s" % (fn_type)) + raise TypeError( + "Operation type is not supported %s" % (fn_type) + ) else: - raise TypeError("Atomic operation is not supported for type %s" % (dtype.name)) + raise TypeError( + "Atomic operation is not supported for type %s" % (dtype.name) + ) if addrspace == address_space.LOCAL: name = name + "_local" @@ -244,7 +258,8 @@ def native_atomic_add(context, builder, sig, args): else: indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [ - context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices) + context.cast(builder, i, t, types.intp) + for t, i in zip(indty, indices) ] if dtype != valty: @@ -286,7 +301,12 @@ def native_atomic_add(context, builder, sig, args): numba_ptr_ty = types.CPointer(dtype, addrspace=ptr_type.addrspace) mangled_fn_name = ext_itanium_mangler.mangle( name, - [numba_ptr_ty, "__spv.Scope.Flag", "__spv.MemorySemanticsMask.Flag", valty], + [ + numba_ptr_ty, + "__spv.Scope.Flag", + "__spv.MemorySemanticsMask.Flag", + valty, + ], ) fnty = ir.FunctionType(retty, spirv_fn_arg_types) @@ -317,7 +337,10 @@ def atomic_add_tuple(context, builder, sig, args): dtype = sig.args[0].dtype if dtype == types.float32 or dtype == types.float64: - if device_type == dpctl.device_type.gpu and config.NATIVE_FP_ATOMICS == 1: + if ( + device_type == dpctl.device_type.gpu + and config.NATIVE_FP_ATOMICS == 1 + ): return native_atomic_add(context, builder, sig, args) else: # Currently, DPCPP only supports native floating point @@ -342,9 +365,13 @@ def atomic_sub_wrapper(context, builder, sig, args): ) val_dtype = sig.args[2] if val_dtype == types.float32 or val_dtype == types.float64: - builder.store(builder.fmul(val, context.get_constant(sig.args[2], -1)), new_val) + builder.store( + builder.fmul(val, context.get_constant(sig.args[2], -1)), new_val + ) elif val_dtype == types.int32 or val_dtype == types.int64: - builder.store(builder.mul(val, context.get_constant(sig.args[2], -1)), new_val) + builder.store( + builder.mul(val, context.get_constant(sig.args[2], -1)), new_val + ) else: raise TypeError("Unsupported type %s" % val_dtype) @@ -361,7 +388,10 @@ def atomic_sub_tuple(context, builder, sig, args): dtype = sig.args[0].dtype if dtype == types.float32 or dtype == types.float64: - if device_type == dpctl.device_type.gpu and config.NATIVE_FP_ATOMICS == 1: + if ( + device_type == dpctl.device_type.gpu + and config.NATIVE_FP_ATOMICS == 1 + ): return atomic_sub_wrapper(context, builder, sig, args) else: # Currently, DPCPP only supports native floating point @@ -388,7 +418,8 @@ def atomic_add(context, builder, sig, args, name): else: indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [ - context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices) + context.cast(builder, i, t, types.intp) + for t, i in zip(indty, indices) ] if dtype != valty: @@ -402,7 +433,10 @@ def atomic_add(context, builder, sig, args, name): lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices) - if isinstance(aryty, DPPYArray) and aryty.addrspace == address_space.LOCAL: + if ( + isinstance(aryty, DPPYArray) + and aryty.addrspace == address_space.LOCAL + ): return insert_and_call_atomic_fn( context, builder, @@ -425,7 +459,9 @@ def atomic_add(context, builder, sig, args, name): address_space.GLOBAL, ) else: - raise ImportError("Atomic support is not present, can not perform atomic_add") + raise ImportError( + "Atomic support is not present, can not perform atomic_add" + ) @lower(stubs.local.array, types.IntegerLiteral, types.Any) @@ -489,7 +525,9 @@ def _generic_array(context, builder, shape, dtype, symbol_name, addrspace): # memories allocated in local address space to global address space. This # approach does not let us identify the original address space of a memory # down the line. - return _make_array(context, builder, gvmem, dtype, shape, addrspace=addrspace) + return _make_array( + context, builder, gvmem, dtype, shape, addrspace=addrspace + ) def _make_array( diff --git a/numba_dppy/printimpl.py b/numba_dppy/printimpl.py index feb6349324..844b2c6813 100644 --- a/numba_dppy/printimpl.py +++ b/numba_dppy/printimpl.py @@ -38,7 +38,9 @@ def print_item(ty, context, builder, val): A (format string, [list of arguments]) is returned that will allow forming the final printf()-like call. """ - raise NotImplementedError("printing unimplemented for values of type %s" % (ty,)) + raise NotImplementedError( + "printing unimplemented for values of type %s" % (ty,) + ) @print_item.register(types.Integer) diff --git a/numba_dppy/rename_numpy_functions_pass.py b/numba_dppy/rename_numpy_functions_pass.py index 0ecbeac429..73d60a2c66 100644 --- a/numba_dppy/rename_numpy_functions_pass.py +++ b/numba_dppy/rename_numpy_functions_pass.py @@ -101,7 +101,9 @@ class RewriteNumPyOverloadedFunctions(object): - def __init__(self, state, rewrite_function_name_map=rewrite_function_name_map): + def __init__( + self, state, rewrite_function_name_map=rewrite_function_name_map + ): self.state = state self.function_name_map = rewrite_function_name_map @@ -143,12 +145,17 @@ def run(self): saved_arr_arg = {} new_body = [] for stmt in block.body: - if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr): + if isinstance(stmt, ir.Assign) and isinstance( + stmt.value, ir.Expr + ): lhs = stmt.target.name rhs = stmt.value # replace np.FOO with name from self.function_name_map["FOO"] # e.g. np.sum will be replaced with numba_dppy.dpnp.sum - if rhs.op == "getattr" and rhs.attr in self.function_name_map: + if ( + rhs.op == "getattr" + and rhs.attr in self.function_name_map + ): module_node = block.find_variable_assignment( rhs.value.name ).value @@ -158,7 +165,8 @@ def run(self): in self.function_name_map[rhs.attr][0] ) or ( isinstance(module_node, ir.Expr) - and module_node.attr in self.function_name_map[rhs.attr][0] + and module_node.attr + in self.function_name_map[rhs.attr][0] ): rhs = stmt.value rhs.attr = self.function_name_map[rhs.attr][1] @@ -180,8 +188,12 @@ def run(self): g_dppy = ir.Global("numba_dppy", numba_dppy, loc) g_dppy_assign = ir.Assign(g_dppy, g_dppy_var, loc) - dpnp_var = ir.Var(scope, mk_unique_var("$4load_attr"), loc) - getattr_dpnp = ir.Expr.getattr(g_dppy_var, "dpnp", loc) + dpnp_var = ir.Var( + scope, mk_unique_var("$4load_attr"), loc + ) + getattr_dpnp = ir.Expr.getattr( + g_dppy_var, "dpnp", loc + ) dpnp_assign = ir.Assign(getattr_dpnp, dpnp_var, loc) rhs.value = dpnp_var @@ -224,7 +236,9 @@ def run_pass(self, state): mutated = rewrite_function_name_pass.run() if mutated: - remove_dead(state.func_ir.blocks, state.func_ir.arg_names, state.func_ir) + remove_dead( + state.func_ir.blocks, state.func_ir.arg_names, state.func_ir + ) state.func_ir.blocks = simplify_CFG(state.func_ir.blocks) return mutated @@ -240,7 +254,9 @@ def get_dpnp_func_typ(func): class RewriteNdarrayFunctions(object): - def __init__(self, state, rewrite_function_name_map=rewrite_function_name_map): + def __init__( + self, state, rewrite_function_name_map=rewrite_function_name_map + ): self.state = state self.function_name_map = rewrite_function_name_map self.typemap = state.type_annotation.typemap @@ -261,7 +277,9 @@ def run(self): block = blocks[label] new_body = [] for stmt in block.body: - if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr): + if isinstance(stmt, ir.Assign) and isinstance( + stmt.value, ir.Expr + ): lhs = stmt.target.name rhs = stmt.value # replace A.func with np.func, and save A in saved_arr_arg @@ -278,13 +296,21 @@ def run(self): scope = arr.scope loc = arr.loc - g_dppy_var = ir.Var(scope, mk_unique_var("$load_global"), loc) - self.typemap[g_dppy_var.name] = types.misc.Module(numba_dppy) + g_dppy_var = ir.Var( + scope, mk_unique_var("$load_global"), loc + ) + self.typemap[g_dppy_var.name] = types.misc.Module( + numba_dppy + ) g_dppy = ir.Global("numba_dppy", numba_dppy, loc) g_dppy_assign = ir.Assign(g_dppy, g_dppy_var, loc) - dpnp_var = ir.Var(scope, mk_unique_var("$load_attr"), loc) - self.typemap[dpnp_var.name] = types.misc.Module(numba_dppy.dpnp) + dpnp_var = ir.Var( + scope, mk_unique_var("$load_attr"), loc + ) + self.typemap[dpnp_var.name] = types.misc.Module( + numba_dppy.dpnp + ) getattr_dpnp = ir.Expr.getattr(g_dppy_var, "dpnp", loc) dpnp_assign = ir.Assign(getattr_dpnp, dpnp_var, loc) @@ -311,9 +337,15 @@ def run(self): # argsort requires kws for typing so sig.args can't be used # reusing sig.args since some types become Const in sig argtyps = old_sig.args[: len(rhs.args)] - kwtyps = {name: self.typemap[v.name] for name, v in rhs.kws} - self.calltypes[rhs] = self.typemap[rhs.func.name].get_call_type( - typingctx, [self.typemap[arr.name]] + list(argtyps), kwtyps + kwtyps = { + name: self.typemap[v.name] for name, v in rhs.kws + } + self.calltypes[rhs] = self.typemap[ + rhs.func.name + ].get_call_type( + typingctx, + [self.typemap[arr.name]] + list(argtyps), + kwtyps, ) rhs.args = [arr] + rhs.args @@ -337,7 +369,9 @@ def run_pass(self, state): mutated = rewrite_ndarray_function_name_pass.run() if mutated: - remove_dead(state.func_ir.blocks, state.func_ir.arg_names, state.func_ir) + remove_dead( + state.func_ir.blocks, state.func_ir.arg_names, state.func_ir + ) state.func_ir.blocks = simplify_CFG(state.func_ir.blocks) return mutated diff --git a/numba_dppy/spirv_generator.py b/numba_dppy/spirv_generator.py index 316493568f..67c9492c0e 100644 --- a/numba_dppy/spirv_generator.py +++ b/numba_dppy/spirv_generator.py @@ -138,7 +138,9 @@ def _create_temp_file(self, name, mode="wb"): return fobj, path def _track_temp_file(self, name): - path = os.path.join(self._tmpdir, "{0}-{1}".format(len(self._tempfiles), name)) + path = os.path.join( + self._tmpdir, "{0}-{1}".format(len(self._tempfiles), name) + ) self._tempfiles.append(path) return path @@ -189,7 +191,9 @@ def finalize(self): print("".center(80, "=")) self._cmd.generate( - llvm_spirv_args=llvm_spirv_args, ipath=self._llvmfile, opath=spirv_path + llvm_spirv_args=llvm_spirv_args, + ipath=self._llvmfile, + opath=spirv_path, ) if len(binary_paths) > 1: diff --git a/numba_dppy/target.py b/numba_dppy/target.py index 7a26cd325c..a44c0e4b3c 100644 --- a/numba_dppy/target.py +++ b/numba_dppy/target.py @@ -27,7 +27,11 @@ from numba.core.utils import cached_property from numba_dppy.dppy_array_type import DPPYArray, DPPYArrayModel -from numba_dppy.utils import address_space, calling_conv, npytypes_array_to_dppy_array +from numba_dppy.utils import ( + address_space, + calling_conv, + npytypes_array_to_dppy_array, +) from . import codegen @@ -90,7 +94,9 @@ def load_additional_registries(self): class GenericPointerModel(datamodel.PrimitiveModel): def __init__(self, dmm, fe_type): adrsp = ( - fe_type.addrspace if fe_type.addrspace is not None else address_space.GLOBAL + fe_type.addrspace + if fe_type.addrspace is not None + else address_space.GLOBAL ) be_type = dmm.lookup(fe_type.dtype).get_data_type().as_pointer(adrsp) super(GenericPointerModel, self).__init__(dmm, fe_type, be_type) @@ -324,8 +330,13 @@ def replace_numpy_ufunc_with_opencl_supported_functions(self): for name, ufunc in ufuncs: for sig in self.ufunc_db[ufunc].keys(): - if sig in sig_mapper and (name, sig_mapper[sig]) in lower_ocl_impl: - self.ufunc_db[ufunc][sig] = lower_ocl_impl[(name, sig_mapper[sig])] + if ( + sig in sig_mapper + and (name, sig_mapper[sig]) in lower_ocl_impl + ): + self.ufunc_db[ufunc][sig] = lower_ocl_impl[ + (name, sig_mapper[sig]) + ] def load_additional_registries(self): """Register OpenCL functions into numba-dppy's target context. diff --git a/numba_dppy/tests/_helper.py b/numba_dppy/tests/_helper.py index 3d88d20c3c..4b6fe1fe72 100644 --- a/numba_dppy/tests/_helper.py +++ b/numba_dppy/tests/_helper.py @@ -139,7 +139,9 @@ def assert_dpnp_implementaion(): with captured_stdout() as stdout, dpnp_debug(): yield - assert "dpnp implementation" in stdout.getvalue(), "dpnp implementation is not used" + assert ( + "dpnp implementation" in stdout.getvalue() + ), "dpnp implementation is not used" @contextlib.contextmanager diff --git a/numba_dppy/tests/kernel_tests/test_atomic_op.py b/numba_dppy/tests/kernel_tests/test_atomic_op.py index 431853155a..bf853c95dd 100644 --- a/numba_dppy/tests/kernel_tests/test_atomic_op.py +++ b/numba_dppy/tests/kernel_tests/test_atomic_op.py @@ -192,7 +192,9 @@ def addrspace(request): def test_atomic_fp_native(filter_str, return_list_of_op, fdtype, addrspace): LLVM_SPIRV_ROOT = os.environ.get("NUMBA_DPPY_LLVM_SPIRV_ROOT") if LLVM_SPIRV_ROOT == "" or LLVM_SPIRV_ROOT is None: - pytest.skip("Please set envar NUMBA_DPPY_LLVM_SPIRV_ROOT to run this test") + pytest.skip( + "Please set envar NUMBA_DPPY_LLVM_SPIRV_ROOT to run this test" + ) if atomic_skip_test(filter_str): pytest.skip() diff --git a/numba_dppy/tests/njit_tests/dpnp/test_numpy_array_ops.py b/numba_dppy/tests/njit_tests/dpnp/test_numpy_array_ops.py index 56cd4ffdfe..7e54cfae71 100644 --- a/numba_dppy/tests/njit_tests/dpnp/test_numpy_array_ops.py +++ b/numba_dppy/tests/njit_tests/dpnp/test_numpy_array_ops.py @@ -84,7 +84,10 @@ def get_shape(request): @pytest.fixture(params=list_of_unary_ops) def unary_op(request): - return wrapper_function("a", f"a.{request.param}()", globals()), request.param + return ( + wrapper_function("a", f"a.{request.param}()", globals()), + request.param, + ) def test_unary_ops(filter_str, unary_op, input_arrays, get_shape, capfd): @@ -96,11 +99,15 @@ def test_unary_ops(filter_str, unary_op, input_arrays, get_shape, capfd): if name != "argsort" and name != "copy": a = np.reshape(a, get_shape) if name == "cumprod" and ( - filter_str == "opencl:cpu:0" or a.dtype == np.int32 or is_gen12(filter_str) + filter_str == "opencl:cpu:0" + or a.dtype == np.int32 + or is_gen12(filter_str) ): pytest.skip() if name == "cumsum" and ( - filter_str == "opencl:cpu:0" or a.dtype == np.int32 or is_gen12(filter_str) + filter_str == "opencl:cpu:0" + or a.dtype == np.int32 + or is_gen12(filter_str) ): pytest.skip() if name == "mean" and is_gen12(filter_str): diff --git a/numba_dppy/tests/njit_tests/dpnp/test_numpy_indexing.py b/numba_dppy/tests/njit_tests/dpnp/test_numpy_indexing.py index cda3ea240a..e448e8b3c7 100644 --- a/numba_dppy/tests/njit_tests/dpnp/test_numpy_indexing.py +++ b/numba_dppy/tests/njit_tests/dpnp/test_numpy_indexing.py @@ -48,7 +48,10 @@ def filter_str(request): [[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]], - [[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]], + [ + [[[1, 2], [3, 4]], [[1, 2], [2, 1]]], + [[[1, 3], [3, 1]], [[0, 1], [1, 3]]], + ], [ [[[1, 2, 3], [3, 4, 5]], [[1, 2, 3], [2, 1, 0]]], [[[1, 3, 5], [3, 1, 0]], [[0, 1, 2], [1, 3, 4]]], diff --git a/numba_dppy/tests/njit_tests/dpnp/test_numpy_linalg.py b/numba_dppy/tests/njit_tests/dpnp/test_numpy_linalg.py index 57f93d0cfd..26efe3f584 100644 --- a/numba_dppy/tests/njit_tests/dpnp/test_numpy_linalg.py +++ b/numba_dppy/tests/njit_tests/dpnp/test_numpy_linalg.py @@ -36,7 +36,9 @@ filter_strings_with_skips_for_opencl = [ "level_zero:gpu:0", pytest.param("opencl:gpu:0", marks=pytest.mark.skip(reason="Freeze")), - pytest.param("opencl:cpu:0", marks=pytest.mark.skip(reason="Segmentation fault")), + pytest.param( + "opencl:cpu:0", marks=pytest.mark.skip(reason="Segmentation fault") + ), # pytest.param("opencl:cpu:0", marks=pytest.mark.xfail(reason="Segmentation fault")), # run with --boxed ] @@ -322,7 +324,8 @@ def test_matrix_power(filter_str, matrix_power_input, power, dtype, capfd): "matrix_rank_input", [ pytest.param( - np.eye(4), marks=pytest.mark.xfail(reason="dpnp does not support it yet") + np.eye(4), + marks=pytest.mark.xfail(reason="dpnp does not support it yet"), ), np.ones((4,)), pytest.param( diff --git a/numba_dppy/tests/njit_tests/dpnp/test_numpy_rng.py b/numba_dppy/tests/njit_tests/dpnp/test_numpy_rng.py index 9f2f05d1a6..ba73f3e953 100644 --- a/numba_dppy/tests/njit_tests/dpnp/test_numpy_rng.py +++ b/numba_dppy/tests/njit_tests/dpnp/test_numpy_rng.py @@ -126,7 +126,9 @@ def test_two_arg_fn(filter_str, two_arg_fn, unary_size, capfd): op_name, first_arg, low, high = two_arg_fn if op_name == "gamma": - pytest.skip("AttributeError: 'NoneType' object has no attribute 'ravel'") + pytest.skip( + "AttributeError: 'NoneType' object has no attribute 'ravel'" + ) op = get_two_arg_fn(op_name) f = njit(op) device = dpctl.SyclDevice(filter_str) @@ -165,7 +167,9 @@ def three_arg_fn(request): def get_three_arg_fn(op_name): - return wrapper_function("a, b, c", f"np.random.{op_name}(a, b, c)", globals()) + return wrapper_function( + "a, b, c", f"np.random.{op_name}(a, b, c)", globals() + ) def test_three_arg_fn(filter_str, three_arg_fn, three_arg_size, capfd): diff --git a/numba_dppy/tests/njit_tests/dpnp/test_numpy_sort_search_count.py b/numba_dppy/tests/njit_tests/dpnp/test_numpy_sort_search_count.py index 426982bd42..fecc2bd7df 100644 --- a/numba_dppy/tests/njit_tests/dpnp/test_numpy_sort_search_count.py +++ b/numba_dppy/tests/njit_tests/dpnp/test_numpy_sort_search_count.py @@ -77,7 +77,10 @@ def get_shape(request): @pytest.fixture(params=list_of_unary_ops) def unary_op(request): - return wrapper_function("a", f"np.{request.param}(a)", globals()), request.param + return ( + wrapper_function("a", f"np.{request.param}(a)", globals()), + request.param, + ) def test_unary_ops(filter_str, unary_op, input_arrays, get_shape, capfd): @@ -111,7 +114,10 @@ def test_unary_ops(filter_str, unary_op, input_arrays, get_shape, capfd): [[3, 2], [1, 6]], [[4, 2, 3], [3, 4, 1]], [[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]], - [[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]], + [ + [[[8, 2], [3, 0]], [[5, 2], [0, 1]]], + [[[1, 3], [3, 1]], [[5, 2], [0, 1]]], + ], ], ids=[ "[3, 4, 2, 1]", diff --git a/numba_dppy/tests/njit_tests/dpnp/test_numpy_statistics.py b/numba_dppy/tests/njit_tests/dpnp/test_numpy_statistics.py index 3b6f5c128a..7190a1b976 100644 --- a/numba_dppy/tests/njit_tests/dpnp/test_numpy_statistics.py +++ b/numba_dppy/tests/njit_tests/dpnp/test_numpy_statistics.py @@ -82,7 +82,10 @@ def get_shape(request): @pytest.fixture(params=list_of_unary_ops) def unary_op(request): - return wrapper_function("a", f"np.{request.param}(a)", globals()), request.param + return ( + wrapper_function("a", f"np.{request.param}(a)", globals()), + request.param, + ) @pytest.mark.parametrize("filter_str", filter_strings_with_skips_for_opencl) diff --git a/numba_dppy/tests/njit_tests/dpnp/test_numpy_transcendentals.py b/numba_dppy/tests/njit_tests/dpnp/test_numpy_transcendentals.py index 75f72d1227..1192dff796 100644 --- a/numba_dppy/tests/njit_tests/dpnp/test_numpy_transcendentals.py +++ b/numba_dppy/tests/njit_tests/dpnp/test_numpy_transcendentals.py @@ -136,7 +136,9 @@ def test_unary_ops(filter_str, unary_op, input_array, get_shape, capfd): assert max_abs_err < 1e-4 -def test_unary_nan_ops(filter_str, unary_nan_op, input_nan_array, get_shape, capfd): +def test_unary_nan_ops( + filter_str, unary_nan_op, input_nan_array, get_shape, capfd +): if skip_test(filter_str): pytest.skip() diff --git a/numba_dppy/tests/test_array_utils.py b/numba_dppy/tests/test_array_utils.py index 5534a39e2a..dc7b430ada 100644 --- a/numba_dppy/tests/test_array_utils.py +++ b/numba_dppy/tests/test_array_utils.py @@ -20,7 +20,11 @@ import pytest from numba_dppy.tests._helper import skip_test -from numba_dppy.utils import as_usm_obj, copy_to_numpy_from_usm_obj, has_usm_memory +from numba_dppy.utils import ( + as_usm_obj, + copy_to_numpy_from_usm_obj, + has_usm_memory, +) from . import _helper diff --git a/numba_dppy/tests/test_black_scholes.py b/numba_dppy/tests/test_black_scholes.py index 3683ee61ed..14f492a419 100644 --- a/numba_dppy/tests/test_black_scholes.py +++ b/numba_dppy/tests/test_black_scholes.py @@ -44,7 +44,13 @@ def cnd(d): def black_scholes( - callResult, putResult, stockPrice, optionStrike, optionYears, Riskfree, Volatility + callResult, + putResult, + stockPrice, + optionStrike, + optionYears, + Riskfree, + Volatility, ): S = stockPrice X = optionStrike @@ -101,7 +107,9 @@ def black_scholes_dppy(callResult, putResult, S, X, T, R, V): if i >= S.shape[0]: return sqrtT = math.sqrt(T[i]) - d1 = (math.log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i]) / (V * sqrtT) + d1 = (math.log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i]) / ( + V * sqrtT + ) d2 = d1 - V * sqrtT K = 1.0 / (1.0 + 0.2316419 * math.fabs(d1)) diff --git a/numba_dppy/tests/test_debug_dppy_numba.py b/numba_dppy/tests/test_debug_dppy_numba.py index b08d2df933..4118f8aba1 100644 --- a/numba_dppy/tests/test_debug_dppy_numba.py +++ b/numba_dppy/tests/test_debug_dppy_numba.py @@ -45,7 +45,9 @@ def spawn(self): env["NUMBA_OPT"] = "0" env["NUMBA_EXTEND_VARIABLE_LIFETIMES"] = "1" - self.child = pexpect.spawn("gdb-oneapi -q python", env=env, encoding="utf-8") + self.child = pexpect.spawn( + "gdb-oneapi -q python", env=env, encoding="utf-8" + ) if config.DEBUG: self.child.logfile = sys.stdout diff --git a/numba_dppy/tests/test_debuginfo.py b/numba_dppy/tests/test_debuginfo.py index 84420d2e51..ea083d759d 100644 --- a/numba_dppy/tests/test_debuginfo.py +++ b/numba_dppy/tests/test_debuginfo.py @@ -33,7 +33,9 @@ def debug_option(request): def get_kernel_ir(sycl_queue, fn, sig, debug=None): - kernel = compiler.compile_kernel(sycl_queue, fn.py_func, sig, None, debug=debug) + kernel = compiler.compile_kernel( + sycl_queue, fn.py_func, sig, None, debug=debug + ) return kernel.assembly @@ -112,7 +114,10 @@ def foo(arr): local_d = 9 * 99 + 5 arr[index] = local_d + 100 - ir_tags = ['!DILocalVariable(name: "index"', '!DILocalVariable(name: "local_d"'] + ir_tags = [ + '!DILocalVariable(name: "index"', + '!DILocalVariable(name: "local_d"', + ] sycl_queue = dpctl.get_current_queue() sig = (npytypes_array_to_dppy_array(types.float32[:]),) @@ -150,7 +155,9 @@ def data_parallel_sum(a, b, c): npytypes_array_to_dppy_array(types.float32[:]), ) - kernel_ir = get_kernel_ir(sycl_queue, data_parallel_sum, sig, debug=debug_option) + kernel_ir = get_kernel_ir( + sycl_queue, data_parallel_sum, sig, debug=debug_option + ) for tag in ir_tags: assert debug_option == make_check(kernel_ir, tag) diff --git a/numba_dppy/tests/test_itanium_mangler_extension.py b/numba_dppy/tests/test_itanium_mangler_extension.py index 56d67748df..d3bbe8c6d2 100644 --- a/numba_dppy/tests/test_itanium_mangler_extension.py +++ b/numba_dppy/tests/test_itanium_mangler_extension.py @@ -58,6 +58,8 @@ def test_mangling_arg_type(dtypes): def test_mangling_arg_type_2(dtypes, addrspaces): dtype, expected_dtype_str = dtypes addrspace, expected_addrspace_str = addrspaces - got = itanium_mangler.mangle_type(types.CPointer(dtype, addrspace=addrspace)) + got = itanium_mangler.mangle_type( + types.CPointer(dtype, addrspace=addrspace) + ) expected = "PU" + expected_addrspace_str + expected_dtype_str assert got == expected diff --git a/numba_dppy/tests/test_prange.py b/numba_dppy/tests/test_prange.py index 4df709c14c..1f6761d2dc 100644 --- a/numba_dppy/tests/test_prange.py +++ b/numba_dppy/tests/test_prange.py @@ -85,9 +85,9 @@ def f(a, b): b = np.ones((m, n)) device = dpctl.SyclDevice("opencl:gpu") - with assert_auto_offloading(parfor_offloaded=2), dppy.offload_to_sycl_device( - device - ): + with assert_auto_offloading( + parfor_offloaded=2 + ), dppy.offload_to_sycl_device(device): f(a, b) self.assertTrue(np.all(b == 10)) @@ -112,9 +112,9 @@ def f(a, b): b = np.ones((m, n, o)) device = dpctl.SyclDevice("opencl:gpu") - with assert_auto_offloading(parfor_offloaded=1), dppy.offload_to_sycl_device( - device - ): + with assert_auto_offloading( + parfor_offloaded=1 + ), dppy.offload_to_sycl_device(device): f(a, b) self.assertTrue(np.all(b == 12)) @@ -134,9 +134,9 @@ def prange_example(): jitted = njit(prange_example) device = dpctl.SyclDevice("opencl:gpu") - with assert_auto_offloading(parfor_offloaded=2), dppy.offload_to_sycl_device( - device - ): + with assert_auto_offloading( + parfor_offloaded=2 + ), dppy.offload_to_sycl_device(device): jitted_res = jitted() res = prange_example() @@ -158,9 +158,9 @@ def prange_example(): jitted = njit(prange_example) device = dpctl.SyclDevice("opencl:gpu") - with assert_auto_offloading(parfor_offloaded=2), dppy.offload_to_sycl_device( - device - ): + with assert_auto_offloading( + parfor_offloaded=2 + ), dppy.offload_to_sycl_device(device): jitted_res = jitted() res = prange_example() diff --git a/numba_dppy/tests/test_with_context.py b/numba_dppy/tests/test_with_context.py index b9cd15730b..786a50267a 100644 --- a/numba_dppy/tests/test_with_context.py +++ b/numba_dppy/tests/test_with_context.py @@ -51,7 +51,9 @@ def func(b): func(expected) np.testing.assert_array_equal(expected, got_gpu) - self.assertTrue("Parfor offloaded to opencl:gpu" in got_gpu_message.getvalue()) + self.assertTrue( + "Parfor offloaded to opencl:gpu" in got_gpu_message.getvalue() + ) @unittest.skipIf(not _helper.has_cpu_queues(), "No CPU platforms available") def test_with_dppy_context_cpu(self): @@ -77,7 +79,9 @@ def func(b): func(expected) np.testing.assert_array_equal(expected, got_cpu) - self.assertTrue("Parfor offloaded to opencl:cpu" in got_cpu_message.getvalue()) + self.assertTrue( + "Parfor offloaded to opencl:cpu" in got_cpu_message.getvalue() + ) if __name__ == "__main__": diff --git a/numba_dppy/utils/array_utils.py b/numba_dppy/utils/array_utils.py index 42dada28be..3c39e452b0 100644 --- a/numba_dppy/utils/array_utils.py +++ b/numba_dppy/utils/array_utils.py @@ -102,7 +102,9 @@ def copy_from_numpy_to_usm_obj(usm_allocated, obj): ) if not obj.flags.c_contiguous: - raise ValueError("Only C-contiguous numpy.ndarray is currently supported!") + raise ValueError( + "Only C-contiguous numpy.ndarray is currently supported!" + ) size = np.prod(obj.shape) if usm_mem.size != (obj.dtype.itemsize * size): @@ -219,11 +221,17 @@ def as_usm_obj(obj, queue=None, usm_type="shared", copy=True): size = np.prod(obj.shape) if usm_type == "shared": - usm_mem = dpctl_mem.MemoryUSMShared(size * obj.dtype.itemsize, queue=queue) + usm_mem = dpctl_mem.MemoryUSMShared( + size * obj.dtype.itemsize, queue=queue + ) elif usm_type == "device": - usm_mem = dpctl_mem.MemoryUSMDevice(size * obj.dtype.itemsize, queue=queue) + usm_mem = dpctl_mem.MemoryUSMDevice( + size * obj.dtype.itemsize, queue=queue + ) elif usm_type == "host": - usm_mem = dpctl_mem.MemoryUSMHost(size * obj.dtype.itemsize, queue=queue) + usm_mem = dpctl_mem.MemoryUSMHost( + size * obj.dtype.itemsize, queue=queue + ) else: raise ValueError( "Supported usm_type are: 'shared', " diff --git a/numba_dppy/vectorizers.py b/numba_dppy/vectorizers.py index d420ba0bb3..0ad1edea58 100644 --- a/numba_dppy/vectorizers.py +++ b/numba_dppy/vectorizers.py @@ -21,7 +21,11 @@ from numba.np.ufunc import deviceufunc import numba_dppy as dppy -from numba_dppy.utils import as_usm_obj, copy_to_numpy_from_usm_obj, has_usm_memory +from numba_dppy.utils import ( + as_usm_obj, + copy_to_numpy_from_usm_obj, + has_usm_memory, +) vectorizer_stager_source = """ def __vectorized_{name}({args}, __out__): diff --git a/pyproject.toml b/pyproject.toml index 43c33e24dc..915b93ebe2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,40 @@ [tool.black] -exclude = 'versioneer.py' +exclude = "versioneer.py|numba_dppy/_version.py" +line-length = 80 [tool.isort] -profile = "black" +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +ensure_newline_before_comments = true +line_length = 80 +skip = ["versioneer.py", "numba_dppy/_version.py"] + +[tool.coverage.run] +branch = true +source = [ + "numba_dppy" +] +omit = [ + "numba_dppy/tests/*", + "numba_dppy/_version.py", +] +concurrency = [ + "multiprocessing", +] +parallel = true + +[tool.coverage.report] +precision = 2 +omit = [ + "numba_dppy/tests/*", + "numba_dppy/_version.py", +] +exclude_lines = [ + "pragma: no cover", + "raise NotImplementedError", + "if config.DEBUG:", + "@register_jitable", + "def dpnp_impl", +] diff --git a/scripts/set_examples_to_doc.py b/scripts/set_examples_to_doc.py index f0f42be389..79c53c3936 100644 --- a/scripts/set_examples_to_doc.py +++ b/scripts/set_examples_to_doc.py @@ -27,7 +27,9 @@ def convert_commands_to_docs(): line = line.replace("# Run:", "$") words = line.split() for i in range(len(words)): - if words[i] == "-command" or words[i].startswith("commands"): + if words[i] == "-command" or words[i].startswith( + "commands" + ): words[i] = "" line = " ".join(words) line = " ".join(line.split()) + "\n"