diff --git a/numba/openmp.py b/numba/openmp.py index cd36a9231..5c31ff5d5 100644 --- a/numba/openmp.py +++ b/numba/openmp.py @@ -330,7 +330,7 @@ def arg_to_str(self, x, lowerer, struct_lower=False, var_table=None, gen_copy=Fa elif isinstance(arg_str, lir.instructions.AllocaInstr): decl = arg_str.get_decl() else: - breakpoint() + assert False, f"Don't know how to get decl string for variable {arg_str} of type {type(arg_str)}" if struct_lower and isinstance(xtyp, types.npytypes.Array): dm = lowerer.context.data_model_manager.lookup(xtyp) @@ -3550,9 +3550,7 @@ def _get_loop_kind(func_var, call_table): if len(call) == 0: return False - return call[0] # or call[0] == prange - #or call[0] == 'internal_prange' or call[0] == internal_prange - #$or call[0] == 'pndindex' or call[0] == pndindex) + return call[0] loop = loops[0] entry = list(loop.entries)[0] @@ -3744,19 +3742,18 @@ def _get_loop_kind(func_var, call_table): size_var = range_args[1] try: step = self.func_ir.get_definition(range_args[2]) + # Only use get_definition to get a const if + # available. Otherwise use the variable. + if not isinstance(step, (int, ir.Const)): + step = range_args[2] except KeyError: - raise NotImplementedError( - "Only known step size is supported for prange") - if not isinstance(step, ir.Const): - raise NotImplementedError( - "Only constant step size is supported for prange") - step = step.value -# if step != 1: -# print("unsupported step:", step, type(step)) -# raise NotImplementedError( -# "Only constant step size of 1 is supported for prange") - - #assert(start == 0 or (isinstance(start, ir.Const) and start.value == 0)) + # If there is more than one definition possible for the + # step variable then just use the variable and don't try + # to convert to a const. + step = range_args[2] + if isinstance(step, ir.Const): + step = step.value + if config.DEBUG_OPENMP >= 1: print("size_var:", size_var, type(size_var)) @@ -3848,7 +3845,15 @@ def _get_loop_kind(func_var, call_table): detect_step_assign = ir.Assign(ir.Const(0, inst.loc), step_var, inst.loc) after_start.append(detect_step_assign) - step_assign = ir.Assign(ir.Const(step, inst.loc), step_var, inst.loc) + if isinstance(step, int): + step_assign = ir.Assign(ir.Const(step, inst.loc), step_var, inst.loc) + elif isinstance(step, ir.Var): + step_assign = ir.Assign(step, step_var, inst.loc) + start_tags.append(openmp_tag("QUAL.OMP.FIRSTPRIVATE", step.name)) + else: + print("Unsupported step:", step, type(step)) + raise NotImplementedError( + f"Unknown step type that isn't a constant or variable but {type(step)} instead.") scale_var = loop_index.scope.redefine("$scale", inst.loc) fake_iternext = ir.Assign(ir.Const(0, inst.loc), iternext_inst.target, inst.loc) fake_second = ir.Assign(ir.Const(0, inst.loc), pair_second_inst.target, inst.loc) @@ -4606,9 +4611,7 @@ def some_data_clause_directive(self, args, start_tags, end_tags, lexer_count, ha end_tags, scope) vars_in_explicit_clauses, explicit_privates, non_user_explicits = self.get_explicit_vars(clauses) - found_loop, blocks_for_io, blocks_in_region, entry_pred, exit_block, inst, size_var, step_var, latest_index, loop_index = prepare_out - assert(found_loop) else: blocks_for_io = self.body_blocks @@ -6363,7 +6366,6 @@ def omp_shared_array(size, dtype): @overload(omp_shared_array, target='cpu', inline='always', prefer_literal=True) def omp_shared_array_overload(size, dtype): - breakpoint() assert isinstance(size, types.IntegerLiteral) def impl(size, dtype): return np.empty(size, dtype=dtype) @@ -6371,7 +6373,6 @@ def impl(size, dtype): @overload(omp_shared_array, target='cuda', inline='always', prefer_literal=True) def omp_shared_array_overload(size, dtype): - breakpoint() assert isinstance(size, types.IntegerLiteral) def impl(size, dtype): return numba_cuda.shared.array(size, dtype) diff --git a/numba/tests/test_openmp.py b/numba/tests/test_openmp.py index 9e2f40e85..aaac7b9c7 100644 --- a/numba/tests/test_openmp.py +++ b/numba/tests/test_openmp.py @@ -606,12 +606,33 @@ def test_parallel_for_range_step_2(self): def test_impl(N): a = np.zeros(N, dtype=np.int32) with openmp("parallel for"): - for i in range(0, 10, 2): + for i in range(0, len(a), 2): a[i] = i + 1 return a self.check(test_impl, 12) + def test_parallel_for_range_step_arg(self): + def test_impl(N, step): + a = np.zeros(N, dtype=np.int32) + with openmp("parallel for"): + for i in range(0, len(a), step): + a[i] = i + 1 + + return a + self.check(test_impl, 12, 2) + + def test_parallel_for_incremented_step(self): + @njit + def test_impl(v, n): + for i in range(n): + with openmp("parallel for"): + for j in range(0, len(v), i + 1): + v[j] = i + 1 + return v + + self.check(test_impl, np.zeros(100), 3) + def test_parallel_for_range_backward_step(self): def test_impl(N): a = np.zeros(N, dtype=np.int32) @@ -1844,19 +1865,6 @@ def test_impl(): test_impl() self.assertIn("Extra code near line", str(raises.exception)) - def test_parallel_for_incremented_step(self): - @njit - def test_impl(v, n): - for i in range(n): - with openmp("parallel for"): - for j in range(0, len(v), i): - v[j] = i - return v - - with self.assertRaises(NotImplementedError) as raises: - test_impl(np.zeros(100), 3) - self.assertIn("Only constant step", str(raises.exception)) - def test_nonstring_var_omp_statement(self): @njit def test_impl(v): @@ -3350,6 +3358,41 @@ def test_impl(): r = test_impl() np.testing.assert_equal(r, np.full(32, 1)) + def target_parallel_for_range_step_arg(self, device): + target_pragma = f"target device({device}) map(tofrom: a)" + parallel_pragma = "parallel for" + N = 10 + step = 2 + @njit + def test_impl(): + a = np.zeros(N, dtype=np.int32) + with openmp(target_pragma): + with openmp(parallel_pragma): + for i in range(0, len(a), step): + a[i] = i + 1 + + return a + r = test_impl() + np.testing.assert_equal(r, np.array([1,0,3,0,5,0,7,0,9,0])) + + def target_parallel_for_incremented_step(self, device): + target_pragma = f"target device({device}) map(tofrom: a)" + parallel_pragma = "parallel for" + N = 10 + step_range = 3 + @njit + def test_impl(): + a = np.zeros(N, dtype=np.int32) + for i in range(step_range): + with openmp(target_pragma): + with openmp(parallel_pragma): + for j in range(0, len(a), i + 1): + a[j] = i + 1 + return a + + r = test_impl() + np.testing.assert_equal(r, np.array([3,1,2,3,2,1,3,1,2,3])) + def target_teams(self, device): target_pragma = f"target teams num_teams(100) device({device}) map(from: a, nteams)" @njit