diff --git a/numba/openmp.py b/numba/openmp.py
index cd36a9231..5c31ff5d5 100644
--- a/numba/openmp.py
+++ b/numba/openmp.py
@@ -330,7 +330,7 @@ def arg_to_str(self, x, lowerer, struct_lower=False, var_table=None, gen_copy=Fa
                     elif isinstance(arg_str, lir.instructions.AllocaInstr):
                         decl = arg_str.get_decl()
                     else:
-                        breakpoint()
+                        assert False, f"Don't know how to get decl string for variable {arg_str} of type {type(arg_str)}"
 
                 if struct_lower and isinstance(xtyp, types.npytypes.Array):
                     dm = lowerer.context.data_model_manager.lookup(xtyp)
@@ -3550,9 +3550,7 @@ def _get_loop_kind(func_var, call_table):
             if len(call) == 0:
                 return False
 
-            return call[0] # or call[0] == prange
-                    #or call[0] == 'internal_prange' or call[0] == internal_prange
-                    #$or call[0] == 'pndindex' or call[0] == pndindex)
+            return call[0]
 
         loop = loops[0]
         entry = list(loop.entries)[0]
@@ -3744,19 +3742,18 @@ def _get_loop_kind(func_var, call_table):
                         size_var = range_args[1]
                         try:
                             step = self.func_ir.get_definition(range_args[2])
+                            # Only use get_definition to get a const if
+                            # available.  Otherwise use the variable.
+                            if not isinstance(step, (int, ir.Const)):
+                                step = range_args[2]
                         except KeyError:
-                            raise NotImplementedError(
-                                "Only known step size is supported for prange")
-                        if not isinstance(step, ir.Const):
-                            raise NotImplementedError(
-                                "Only constant step size is supported for prange")
-                        step = step.value
-#                        if step != 1:
-#                            print("unsupported step:", step, type(step))
-#                            raise NotImplementedError(
-#                                "Only constant step size of 1 is supported for prange")
-
-                    #assert(start == 0 or (isinstance(start, ir.Const) and start.value == 0))
+                            # If there is more than one definition possible for the
+                            # step variable then just use the variable and don't try
+                            # to convert to a const.
+                            step = range_args[2]
+                        if isinstance(step, ir.Const):
+                            step = step.value
+
                     if config.DEBUG_OPENMP >= 1:
                         print("size_var:", size_var, type(size_var))
 
@@ -3848,7 +3845,15 @@ def _get_loop_kind(func_var, call_table):
                     detect_step_assign = ir.Assign(ir.Const(0, inst.loc), step_var, inst.loc)
                     after_start.append(detect_step_assign)
 
-                    step_assign = ir.Assign(ir.Const(step, inst.loc), step_var, inst.loc)
+                    if isinstance(step, int):
+                        step_assign = ir.Assign(ir.Const(step, inst.loc), step_var, inst.loc)
+                    elif isinstance(step, ir.Var):
+                        step_assign = ir.Assign(step, step_var, inst.loc)
+                        start_tags.append(openmp_tag("QUAL.OMP.FIRSTPRIVATE", step.name))
+                    else:
+                        print("Unsupported step:", step, type(step))
+                        raise NotImplementedError(
+                            f"Unknown step type that isn't a constant or variable but {type(step)} instead.")
                     scale_var = loop_index.scope.redefine("$scale", inst.loc)
                     fake_iternext = ir.Assign(ir.Const(0, inst.loc), iternext_inst.target, inst.loc)
                     fake_second = ir.Assign(ir.Const(0, inst.loc), pair_second_inst.target, inst.loc)
@@ -4606,9 +4611,7 @@ def some_data_clause_directive(self, args, start_tags, end_tags, lexer_count, ha
                                                      end_tags,
                                                      scope)
             vars_in_explicit_clauses, explicit_privates, non_user_explicits = self.get_explicit_vars(clauses)
-
             found_loop, blocks_for_io, blocks_in_region, entry_pred, exit_block, inst, size_var, step_var, latest_index, loop_index = prepare_out
-
             assert(found_loop)
         else:
             blocks_for_io = self.body_blocks
@@ -6363,7 +6366,6 @@ def omp_shared_array(size, dtype):
 
 @overload(omp_shared_array, target='cpu', inline='always', prefer_literal=True)
 def omp_shared_array_overload(size, dtype):
-    breakpoint()
     assert isinstance(size, types.IntegerLiteral)
     def impl(size, dtype):
         return np.empty(size, dtype=dtype)
@@ -6371,7 +6373,6 @@ def impl(size, dtype):
 
 @overload(omp_shared_array, target='cuda', inline='always', prefer_literal=True)
 def omp_shared_array_overload(size, dtype):
-    breakpoint()
     assert isinstance(size, types.IntegerLiteral)
     def impl(size, dtype):
         return numba_cuda.shared.array(size, dtype)
diff --git a/numba/tests/test_openmp.py b/numba/tests/test_openmp.py
index 9e2f40e85..aaac7b9c7 100644
--- a/numba/tests/test_openmp.py
+++ b/numba/tests/test_openmp.py
@@ -606,12 +606,33 @@ def test_parallel_for_range_step_2(self):
         def test_impl(N):
             a = np.zeros(N, dtype=np.int32)
             with openmp("parallel for"):
-                for i in range(0, 10, 2):
+                for i in range(0, len(a), 2):
                     a[i] = i + 1
 
             return a
         self.check(test_impl, 12)
 
+    def test_parallel_for_range_step_arg(self):
+        def test_impl(N, step):
+            a = np.zeros(N, dtype=np.int32)
+            with openmp("parallel for"):
+                for i in range(0, len(a), step):
+                    a[i] = i + 1
+
+            return a
+        self.check(test_impl, 12, 2)
+
+    def test_parallel_for_incremented_step(self):
+        @njit
+        def test_impl(v, n):
+            for i in range(n):
+                with openmp("parallel for"):
+                    for j in range(0, len(v), i + 1):
+                        v[j] = i + 1
+            return v
+
+        self.check(test_impl, np.zeros(100), 3)
+
     def test_parallel_for_range_backward_step(self):
         def test_impl(N):
             a = np.zeros(N, dtype=np.int32)
@@ -1844,19 +1865,6 @@ def test_impl():
             test_impl()
         self.assertIn("Extra code near line", str(raises.exception))
 
-    def test_parallel_for_incremented_step(self):
-        @njit
-        def test_impl(v, n):
-            for i in range(n):
-                with openmp("parallel for"):
-                    for j in range(0, len(v), i):
-                        v[j] = i
-            return v
-
-        with self.assertRaises(NotImplementedError) as raises:
-            test_impl(np.zeros(100), 3)
-        self.assertIn("Only constant step", str(raises.exception))
-
     def test_nonstring_var_omp_statement(self):
         @njit
         def test_impl(v):
@@ -3350,6 +3358,41 @@ def test_impl():
         r = test_impl()
         np.testing.assert_equal(r, np.full(32, 1))
 
+    def target_parallel_for_range_step_arg(self, device):
+        target_pragma = f"target device({device}) map(tofrom: a)"
+        parallel_pragma = "parallel for"
+        N = 10
+        step = 2
+        @njit
+        def test_impl():
+            a = np.zeros(N, dtype=np.int32)
+            with openmp(target_pragma):
+                with openmp(parallel_pragma):
+                    for i in range(0, len(a), step):
+                        a[i] = i + 1
+
+            return a
+        r = test_impl()
+        np.testing.assert_equal(r, np.array([1,0,3,0,5,0,7,0,9,0]))
+
+    def target_parallel_for_incremented_step(self, device):
+        target_pragma = f"target device({device}) map(tofrom: a)"
+        parallel_pragma = "parallel for"
+        N = 10
+        step_range = 3
+        @njit
+        def test_impl():
+            a = np.zeros(N, dtype=np.int32)
+            for i in range(step_range):
+                with openmp(target_pragma):
+                    with openmp(parallel_pragma):
+                        for j in range(0, len(a), i + 1):
+                            a[j] = i + 1
+            return a
+
+        r = test_impl()
+        np.testing.assert_equal(r, np.array([3,1,2,3,2,1,3,1,2,3]))
+
     def target_teams(self, device):
         target_pragma = f"target teams num_teams(100) device({device}) map(from: a, nteams)"
         @njit