Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 22 additions & 21 deletions numba/openmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def arg_to_str(self, x, lowerer, struct_lower=False, var_table=None, gen_copy=Fa
elif isinstance(arg_str, lir.instructions.AllocaInstr):
decl = arg_str.get_decl()
else:
breakpoint()
assert False, f"Don't know how to get decl string for variable {arg_str} of type {type(arg_str)}"

if struct_lower and isinstance(xtyp, types.npytypes.Array):
dm = lowerer.context.data_model_manager.lookup(xtyp)
Expand Down Expand Up @@ -3550,9 +3550,7 @@ def _get_loop_kind(func_var, call_table):
if len(call) == 0:
return False

return call[0] # or call[0] == prange
#or call[0] == 'internal_prange' or call[0] == internal_prange
#$or call[0] == 'pndindex' or call[0] == pndindex)
return call[0]

loop = loops[0]
entry = list(loop.entries)[0]
Expand Down Expand Up @@ -3744,19 +3742,18 @@ def _get_loop_kind(func_var, call_table):
size_var = range_args[1]
try:
step = self.func_ir.get_definition(range_args[2])
# Only use get_definition to get a const if
# available. Otherwise use the variable.
if not isinstance(step, (int, ir.Const)):
step = range_args[2]
except KeyError:
raise NotImplementedError(
"Only known step size is supported for prange")
if not isinstance(step, ir.Const):
raise NotImplementedError(
"Only constant step size is supported for prange")
step = step.value
# if step != 1:
# print("unsupported step:", step, type(step))
# raise NotImplementedError(
# "Only constant step size of 1 is supported for prange")

#assert(start == 0 or (isinstance(start, ir.Const) and start.value == 0))
# If there is more than one definition possible for the
# step variable then just use the variable and don't try
# to convert to a const.
step = range_args[2]
if isinstance(step, ir.Const):
step = step.value

if config.DEBUG_OPENMP >= 1:
print("size_var:", size_var, type(size_var))

Expand Down Expand Up @@ -3848,7 +3845,15 @@ def _get_loop_kind(func_var, call_table):
detect_step_assign = ir.Assign(ir.Const(0, inst.loc), step_var, inst.loc)
after_start.append(detect_step_assign)

step_assign = ir.Assign(ir.Const(step, inst.loc), step_var, inst.loc)
if isinstance(step, int):
step_assign = ir.Assign(ir.Const(step, inst.loc), step_var, inst.loc)
elif isinstance(step, ir.Var):
step_assign = ir.Assign(step, step_var, inst.loc)
start_tags.append(openmp_tag("QUAL.OMP.FIRSTPRIVATE", step.name))
else:
print("Unsupported step:", step, type(step))
raise NotImplementedError(
f"Unknown step type that isn't a constant or variable but {type(step)} instead.")
scale_var = loop_index.scope.redefine("$scale", inst.loc)
fake_iternext = ir.Assign(ir.Const(0, inst.loc), iternext_inst.target, inst.loc)
fake_second = ir.Assign(ir.Const(0, inst.loc), pair_second_inst.target, inst.loc)
Expand Down Expand Up @@ -4606,9 +4611,7 @@ def some_data_clause_directive(self, args, start_tags, end_tags, lexer_count, ha
end_tags,
scope)
vars_in_explicit_clauses, explicit_privates, non_user_explicits = self.get_explicit_vars(clauses)

found_loop, blocks_for_io, blocks_in_region, entry_pred, exit_block, inst, size_var, step_var, latest_index, loop_index = prepare_out

assert(found_loop)
else:
blocks_for_io = self.body_blocks
Expand Down Expand Up @@ -6363,15 +6366,13 @@ def omp_shared_array(size, dtype):

@overload(omp_shared_array, target='cpu', inline='always', prefer_literal=True)
def omp_shared_array_overload(size, dtype):
breakpoint()
assert isinstance(size, types.IntegerLiteral)
def impl(size, dtype):
return np.empty(size, dtype=dtype)
return impl

@overload(omp_shared_array, target='cuda', inline='always', prefer_literal=True)
def omp_shared_array_overload(size, dtype):
breakpoint()
assert isinstance(size, types.IntegerLiteral)
def impl(size, dtype):
return numba_cuda.shared.array(size, dtype)
Expand Down
71 changes: 57 additions & 14 deletions numba/tests/test_openmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,12 +606,33 @@ def test_parallel_for_range_step_2(self):
def test_impl(N):
a = np.zeros(N, dtype=np.int32)
with openmp("parallel for"):
for i in range(0, 10, 2):
for i in range(0, len(a), 2):
a[i] = i + 1

return a
self.check(test_impl, 12)

def test_parallel_for_range_step_arg(self):
def test_impl(N, step):
a = np.zeros(N, dtype=np.int32)
with openmp("parallel for"):
for i in range(0, len(a), step):
a[i] = i + 1

return a
self.check(test_impl, 12, 2)

def test_parallel_for_incremented_step(self):
@njit
def test_impl(v, n):
for i in range(n):
with openmp("parallel for"):
for j in range(0, len(v), i + 1):
v[j] = i + 1
return v

self.check(test_impl, np.zeros(100), 3)

def test_parallel_for_range_backward_step(self):
def test_impl(N):
a = np.zeros(N, dtype=np.int32)
Expand Down Expand Up @@ -1844,19 +1865,6 @@ def test_impl():
test_impl()
self.assertIn("Extra code near line", str(raises.exception))

def test_parallel_for_incremented_step(self):
@njit
def test_impl(v, n):
for i in range(n):
with openmp("parallel for"):
for j in range(0, len(v), i):
v[j] = i
return v

with self.assertRaises(NotImplementedError) as raises:
test_impl(np.zeros(100), 3)
self.assertIn("Only constant step", str(raises.exception))

def test_nonstring_var_omp_statement(self):
@njit
def test_impl(v):
Expand Down Expand Up @@ -3350,6 +3358,41 @@ def test_impl():
r = test_impl()
np.testing.assert_equal(r, np.full(32, 1))

def target_parallel_for_range_step_arg(self, device):
target_pragma = f"target device({device}) map(tofrom: a)"
parallel_pragma = "parallel for"
N = 10
step = 2
@njit
def test_impl():
a = np.zeros(N, dtype=np.int32)
with openmp(target_pragma):
with openmp(parallel_pragma):
for i in range(0, len(a), step):
a[i] = i + 1

return a
r = test_impl()
np.testing.assert_equal(r, np.array([1,0,3,0,5,0,7,0,9,0]))

def target_parallel_for_incremented_step(self, device):
target_pragma = f"target device({device}) map(tofrom: a)"
parallel_pragma = "parallel for"
N = 10
step_range = 3
@njit
def test_impl():
a = np.zeros(N, dtype=np.int32)
for i in range(step_range):
with openmp(target_pragma):
with openmp(parallel_pragma):
for j in range(0, len(a), i + 1):
a[j] = i + 1
return a

r = test_impl()
np.testing.assert_equal(r, np.array([3,1,2,3,2,1,3,1,2,3]))

def target_teams(self, device):
target_pragma = f"target teams num_teams(100) device({device}) map(from: a, nteams)"
@njit
Expand Down
Loading