From e8e904178cf8fd101867c44f917c291818018d7c Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Wed, 13 Nov 2024 15:49:05 -0800 Subject: [PATCH 1/5] Allow step to be a non-constant. --- numba/openmp.py | 32 +++++++++++++++++--------------- numba/tests/test_openmp.py | 10 ++++++++++ 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/numba/openmp.py b/numba/openmp.py index cd36a9231..4eddf9217 100644 --- a/numba/openmp.py +++ b/numba/openmp.py @@ -3550,9 +3550,7 @@ def _get_loop_kind(func_var, call_table): if len(call) == 0: return False - return call[0] # or call[0] == prange - #or call[0] == 'internal_prange' or call[0] == internal_prange - #$or call[0] == 'pndindex' or call[0] == pndindex) + return call[0] loop = loops[0] entry = list(loop.entries)[0] @@ -3744,19 +3742,16 @@ def _get_loop_kind(func_var, call_table): size_var = range_args[1] try: step = self.func_ir.get_definition(range_args[2]) + # get_definition with return arg.foo if foo is an + # argument instead of simply foo. Fix that here. + if isinstance(step, ir.Arg): + step = ir.Var(loop_index.scope, step.name, inst.loc) except KeyError: raise NotImplementedError( - "Only known step size is supported for prange") - if not isinstance(step, ir.Const): - raise NotImplementedError( - "Only constant step size is supported for prange") - step = step.value -# if step != 1: -# print("unsupported step:", step, type(step)) -# raise NotImplementedError( -# "Only constant step size of 1 is supported for prange") - - #assert(start == 0 or (isinstance(start, ir.Const) and start.value == 0)) + "Only known step size is supported for range") + if isinstance(step, ir.Const): + step = step.value + if config.DEBUG_OPENMP >= 1: print("size_var:", size_var, type(size_var)) @@ -3848,7 +3843,14 @@ def _get_loop_kind(func_var, call_table): detect_step_assign = ir.Assign(ir.Const(0, inst.loc), step_var, inst.loc) after_start.append(detect_step_assign) - step_assign = ir.Assign(ir.Const(step, inst.loc), step_var, inst.loc) + if isinstance(step, int): + step_assign = ir.Assign(ir.Const(step, inst.loc), step_var, inst.loc) + elif isinstance(step, ir.Var): + step_assign = ir.Assign(step, step_var, inst.loc) + else: + print("Unsupported step:", step, type(step)) + raise NotImplementedError( + f"Unknown step type that isn't a constant or variable but {type(step)} instead.") scale_var = loop_index.scope.redefine("$scale", inst.loc) fake_iternext = ir.Assign(ir.Const(0, inst.loc), iternext_inst.target, inst.loc) fake_second = ir.Assign(ir.Const(0, inst.loc), pair_second_inst.target, inst.loc) diff --git a/numba/tests/test_openmp.py b/numba/tests/test_openmp.py index 9e2f40e85..0889063cc 100644 --- a/numba/tests/test_openmp.py +++ b/numba/tests/test_openmp.py @@ -612,6 +612,16 @@ def test_impl(N): return a self.check(test_impl, 12) + def test_parallel_for_range_step_arg(self): + def test_impl(N, step): + a = np.zeros(N, dtype=np.int32) + with openmp("parallel for"): + for i in range(0, 10, step): + a[i] = i + 1 + + return a + self.check(test_impl, 12, 2) + def test_parallel_for_range_backward_step(self): def test_impl(N): a = np.zeros(N, dtype=np.int32) From d8047803b96b3cf957ffcdf83dffa72ded95d431 Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Fri, 15 Nov 2024 09:27:08 -0800 Subject: [PATCH 2/5] Move previously asserting test to the working section. That tests was flawed and had a 0 step in it so fixed that as well. --- numba/tests/test_openmp.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/numba/tests/test_openmp.py b/numba/tests/test_openmp.py index 0889063cc..44c5474fe 100644 --- a/numba/tests/test_openmp.py +++ b/numba/tests/test_openmp.py @@ -622,6 +622,17 @@ def test_impl(N, step): return a self.check(test_impl, 12, 2) + def test_parallel_for_incremented_step(self): + @njit + def test_impl(v, n): + for i in range(n): + with openmp("parallel for"): + for j in range(0, len(v), i + 1): + v[j] = i + 1 + return v + + self.check(test_impl, np.zeros(100), 3) + def test_parallel_for_range_backward_step(self): def test_impl(N): a = np.zeros(N, dtype=np.int32) @@ -1854,19 +1865,6 @@ def test_impl(): test_impl() self.assertIn("Extra code near line", str(raises.exception)) - def test_parallel_for_incremented_step(self): - @njit - def test_impl(v, n): - for i in range(n): - with openmp("parallel for"): - for j in range(0, len(v), i): - v[j] = i - return v - - with self.assertRaises(NotImplementedError) as raises: - test_impl(np.zeros(100), 3) - self.assertIn("Only constant step", str(raises.exception)) - def test_nonstring_var_omp_statement(self): @njit def test_impl(v): From 8e05cbdf2ad6d34e99e3150b5d2364456b3fcdb1 Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Fri, 15 Nov 2024 09:28:46 -0800 Subject: [PATCH 3/5] Add range step var as firstprivate. --- numba/openmp.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/numba/openmp.py b/numba/openmp.py index 4eddf9217..6bd5bf964 100644 --- a/numba/openmp.py +++ b/numba/openmp.py @@ -330,7 +330,7 @@ def arg_to_str(self, x, lowerer, struct_lower=False, var_table=None, gen_copy=Fa elif isinstance(arg_str, lir.instructions.AllocaInstr): decl = arg_str.get_decl() else: - breakpoint() + assert False if struct_lower and isinstance(xtyp, types.npytypes.Array): dm = lowerer.context.data_model_manager.lookup(xtyp) @@ -3742,10 +3742,10 @@ def _get_loop_kind(func_var, call_table): size_var = range_args[1] try: step = self.func_ir.get_definition(range_args[2]) - # get_definition with return arg.foo if foo is an - # argument instead of simply foo. Fix that here. - if isinstance(step, ir.Arg): - step = ir.Var(loop_index.scope, step.name, inst.loc) + # Only use get_definition to get a const if + # available. Otherwise use the variable. + if not isinstance(step, int): + step = range_args[2] except KeyError: raise NotImplementedError( "Only known step size is supported for range") @@ -3847,6 +3847,7 @@ def _get_loop_kind(func_var, call_table): step_assign = ir.Assign(ir.Const(step, inst.loc), step_var, inst.loc) elif isinstance(step, ir.Var): step_assign = ir.Assign(step, step_var, inst.loc) + start_tags.append(openmp_tag("QUAL.OMP.FIRSTPRIVATE", step.name)) else: print("Unsupported step:", step, type(step)) raise NotImplementedError( @@ -4608,9 +4609,7 @@ def some_data_clause_directive(self, args, start_tags, end_tags, lexer_count, ha end_tags, scope) vars_in_explicit_clauses, explicit_privates, non_user_explicits = self.get_explicit_vars(clauses) - found_loop, blocks_for_io, blocks_in_region, entry_pred, exit_block, inst, size_var, step_var, latest_index, loop_index = prepare_out - assert(found_loop) else: blocks_for_io = self.body_blocks @@ -6365,7 +6364,6 @@ def omp_shared_array(size, dtype): @overload(omp_shared_array, target='cpu', inline='always', prefer_literal=True) def omp_shared_array_overload(size, dtype): - breakpoint() assert isinstance(size, types.IntegerLiteral) def impl(size, dtype): return np.empty(size, dtype=dtype) @@ -6373,7 +6371,6 @@ def impl(size, dtype): @overload(omp_shared_array, target='cuda', inline='always', prefer_literal=True) def omp_shared_array_overload(size, dtype): - breakpoint() assert isinstance(size, types.IntegerLiteral) def impl(size, dtype): return numba_cuda.shared.array(size, dtype) From 409d551d8f52d66fb34077bf297889b9ef4c8ed6 Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Fri, 15 Nov 2024 10:03:59 -0800 Subject: [PATCH 4/5] Add arbitrary step support tests for target. --- numba/tests/test_openmp.py | 39 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/numba/tests/test_openmp.py b/numba/tests/test_openmp.py index 44c5474fe..aaac7b9c7 100644 --- a/numba/tests/test_openmp.py +++ b/numba/tests/test_openmp.py @@ -606,7 +606,7 @@ def test_parallel_for_range_step_2(self): def test_impl(N): a = np.zeros(N, dtype=np.int32) with openmp("parallel for"): - for i in range(0, 10, 2): + for i in range(0, len(a), 2): a[i] = i + 1 return a @@ -616,7 +616,7 @@ def test_parallel_for_range_step_arg(self): def test_impl(N, step): a = np.zeros(N, dtype=np.int32) with openmp("parallel for"): - for i in range(0, 10, step): + for i in range(0, len(a), step): a[i] = i + 1 return a @@ -3358,6 +3358,41 @@ def test_impl(): r = test_impl() np.testing.assert_equal(r, np.full(32, 1)) + def target_parallel_for_range_step_arg(self, device): + target_pragma = f"target device({device}) map(tofrom: a)" + parallel_pragma = "parallel for" + N = 10 + step = 2 + @njit + def test_impl(): + a = np.zeros(N, dtype=np.int32) + with openmp(target_pragma): + with openmp(parallel_pragma): + for i in range(0, len(a), step): + a[i] = i + 1 + + return a + r = test_impl() + np.testing.assert_equal(r, np.array([1,0,3,0,5,0,7,0,9,0])) + + def target_parallel_for_incremented_step(self, device): + target_pragma = f"target device({device}) map(tofrom: a)" + parallel_pragma = "parallel for" + N = 10 + step_range = 3 + @njit + def test_impl(): + a = np.zeros(N, dtype=np.int32) + for i in range(step_range): + with openmp(target_pragma): + with openmp(parallel_pragma): + for j in range(0, len(a), i + 1): + a[j] = i + 1 + return a + + r = test_impl() + np.testing.assert_equal(r, np.array([3,1,2,3,2,1,3,1,2,3])) + def target_teams(self, device): target_pragma = f"target teams num_teams(100) device({device}) map(from: a, nteams)" @njit From b4e7e4bd626e6a97561a6b01ca665705c5f86655 Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Fri, 15 Nov 2024 16:38:55 -0800 Subject: [PATCH 5/5] Code review fixes. --- numba/openmp.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/numba/openmp.py b/numba/openmp.py index 6bd5bf964..5c31ff5d5 100644 --- a/numba/openmp.py +++ b/numba/openmp.py @@ -330,7 +330,7 @@ def arg_to_str(self, x, lowerer, struct_lower=False, var_table=None, gen_copy=Fa elif isinstance(arg_str, lir.instructions.AllocaInstr): decl = arg_str.get_decl() else: - assert False + assert False, f"Don't know how to get decl string for variable {arg_str} of type {type(arg_str)}" if struct_lower and isinstance(xtyp, types.npytypes.Array): dm = lowerer.context.data_model_manager.lookup(xtyp) @@ -3744,11 +3744,13 @@ def _get_loop_kind(func_var, call_table): step = self.func_ir.get_definition(range_args[2]) # Only use get_definition to get a const if # available. Otherwise use the variable. - if not isinstance(step, int): + if not isinstance(step, (int, ir.Const)): step = range_args[2] except KeyError: - raise NotImplementedError( - "Only known step size is supported for range") + # If there is more than one definition possible for the + # step variable then just use the variable and don't try + # to convert to a const. + step = range_args[2] if isinstance(step, ir.Const): step = step.value