diff --git a/sdc/distributed.py b/sdc/distributed.py index 26ad23cc3..1be684b4e 100644 --- a/sdc/distributed.py +++ b/sdc/distributed.py @@ -1770,7 +1770,7 @@ def _gen_parfor_reductions(self, parfor, namevar_table): _, reductions = get_parfor_reductions( parfor, parfor.params, self.state.calltypes) - for reduce_varname, (init_val, reduce_nodes) in reductions.items(): + for reduce_varname, (init_val, reduce_nodes, _) in reductions.items(): reduce_op = guard(self._get_reduce_op, reduce_nodes) # TODO: initialize reduction vars (arrays) reduce_var = namevar_table[reduce_varname] diff --git a/sdc/hiframes/aggregate.py b/sdc/hiframes/aggregate.py index 161812e46..d6364ebb8 100644 --- a/sdc/hiframes/aggregate.py +++ b/sdc/hiframes/aggregate.py @@ -438,7 +438,7 @@ def aggregate_array_analysis(aggregate_node, equiv_set, typemap, equiv_set.insert_equiv(col_var, shape) post.extend(c_post) all_shapes.append(shape[0]) - equiv_set.define(col_var) + equiv_set.define(col_var, {}) if len(all_shapes) > 1: equiv_set.insert_equiv(*all_shapes) diff --git a/sdc/hiframes/dataframe_pass.py b/sdc/hiframes/dataframe_pass.py index 3ae428e95..0d8cc31c3 100644 --- a/sdc/hiframes/dataframe_pass.py +++ b/sdc/hiframes/dataframe_pass.py @@ -140,7 +140,8 @@ def run_pass(self): out_nodes = [inst] if isinstance(inst, ir.Assign): - self.state.func_ir._definitions[inst.target.name].remove(inst.value) + if inst.value in self.state.func_ir._definitions[inst.target.name]: + self.state.func_ir._definitions[inst.target.name].remove(inst.value) out_nodes = self._run_assign(inst) elif isinstance(inst, (ir.SetItem, ir.StaticSetItem)): out_nodes = self._run_setitem(inst) diff --git a/sdc/hiframes/filter.py b/sdc/hiframes/filter.py index 3db62ff79..fe3d143e7 100644 --- a/sdc/hiframes/filter.py +++ b/sdc/hiframes/filter.py @@ -100,7 +100,7 @@ def filter_array_analysis(filter_node, equiv_set, typemap, array_analysis): equiv_set.insert_equiv(col_var, shape) post.extend(c_post) all_shapes.append(shape[0]) - equiv_set.define(col_var) + equiv_set.define(col_var, {}) if len(all_shapes) > 1: equiv_set.insert_equiv(*all_shapes) diff --git a/sdc/hiframes/hiframes_typed.py b/sdc/hiframes/hiframes_typed.py index 19e6c6f05..bede365ea 100644 --- a/sdc/hiframes/hiframes_typed.py +++ b/sdc/hiframes/hiframes_typed.py @@ -1212,10 +1212,7 @@ def _handle_series_map(self, assign, lhs, rhs, series_var): # error checking: make sure there is function input only if len(rhs.args) != 1: raise ValueError("map expects 1 argument") - func = guard(get_definition, self.state.func_ir, rhs.args[0]) - if func is None or not (isinstance(func, ir.Expr) - and func.op == 'make_function'): - raise ValueError("lambda for map not found") + func = guard(get_definition, self.state.func_ir, rhs.args[0]).value.py_func dtype = self.state.typemap[series_var.name].dtype nodes = [] @@ -1382,11 +1379,7 @@ def _handle_series_combine(self, assign, lhs, rhs, series_var): raise ValueError("not enough arguments in call to combine") if len(rhs.args) > 3: raise ValueError("too many arguments in call to combine") - func = guard(get_definition, self.state.func_ir, rhs.args[1]) - if func is None or not (isinstance(func, ir.Expr) - and func.op == 'make_function'): - raise ValueError("lambda for combine not found") - + func = guard(get_definition, self.state.func_ir, rhs.args[1]).value.py_func out_typ = self.state.typemap[lhs.name].dtype other = rhs.args[0] nodes = [] @@ -1533,19 +1526,16 @@ def f(arr, w, center): # pragma: no cover def _handle_rolling_apply_func(self, func_node, dtype, out_dtype): if func_node is None: raise ValueError("cannot find kernel function for rolling.apply() call") + func_node = func_node.value.py_func # TODO: more error checking on the kernel to make sure it doesn't # use global/closure variables - if func_node.closure is not None: - raise ValueError("rolling apply kernel functions cannot have closure variables") - if func_node.defaults is not None: - raise ValueError("rolling apply kernel functions cannot have default arguments") # create a function from the code object glbs = self.state.func_ir.func_id.func.__globals__ lcs = {} exec("def f(A): return A", glbs, lcs) kernel_func = lcs['f'] - kernel_func.__code__ = func_node.code - kernel_func.__name__ = func_node.code.co_name + kernel_func.__code__ = func_node.__code__ + kernel_func.__name__ = func_node.__code__.co_name # use hpat's sequential pipeline to enable pandas operations # XXX seq pipeline used since dist pass causes a hang m = numba.ir_utils._max_label diff --git a/sdc/hiframes/join.py b/sdc/hiframes/join.py index 246e0a176..540d3fd7d 100644 --- a/sdc/hiframes/join.py +++ b/sdc/hiframes/join.py @@ -131,7 +131,7 @@ def join_array_analysis(join_node, equiv_set, typemap, array_analysis): equiv_set.insert_equiv(col_var, shape) post.extend(c_post) all_shapes.append(shape[0]) - equiv_set.define(col_var) + equiv_set.define(col_var, {}) if len(all_shapes) > 1: equiv_set.insert_equiv(*all_shapes) diff --git a/sdc/hiframes/pd_dataframe_ext.py b/sdc/hiframes/pd_dataframe_ext.py index 61e988095..27bf593b0 100644 --- a/sdc/hiframes/pd_dataframe_ext.py +++ b/sdc/hiframes/pd_dataframe_ext.py @@ -142,10 +142,6 @@ def resolve_values(self, ary): def resolve_apply(self, df, args, kws): kws = dict(kws) func = args[0] if len(args) > 0 else kws.get('func', None) - # check lambda - if not isinstance(func, types.MakeFunctionLiteral): - raise ValueError("df.apply(): lambda not found") - # check axis axis = args[1] if len(args) > 1 else kws.get('axis', None) if (axis is None or not isinstance(axis, types.IntegerLiteral) @@ -165,12 +161,8 @@ def resolve_apply(self, df, args, kws): dtypes.append(el_typ) row_typ = types.NamedTuple(dtypes, Row) - code = func.literal_value.code - f_ir = numba.ir_utils.get_ir_of_code({'np': np}, code) - _, f_return_type, _ = numba.typed_passes.type_inference_stage( - self.context, f_ir, (row_typ,), None) - - return signature(SeriesType(f_return_type), *args) + t = func.get_call_type(self.context, (row_typ,), {}) + return signature(SeriesType(t.return_type), *args) @bound_function("df.describe") def resolve_describe(self, df, args, kws): diff --git a/sdc/hiframes/pd_series_ext.py b/sdc/hiframes/pd_series_ext.py index 7366036d8..c85614b2e 100644 --- a/sdc/hiframes/pd_series_ext.py +++ b/sdc/hiframes/pd_series_ext.py @@ -564,18 +564,8 @@ def _resolve_map_func(self, ary, args, kws): # getitem returns Timestamp for dt_index and series(dt64) if dtype == types.NPDatetime('ns'): dtype = pandas_timestamp_type - code = args[0].literal_value.code - _globals = {'np': np} - # XXX hack in hiframes_typed to make globals available - if hasattr(args[0].literal_value, 'globals'): - # TODO: use code.co_names to find globals actually used? - _globals = args[0].literal_value.globals - - f_ir = numba.ir_utils.get_ir_of_code(_globals, code) - f_typemap, f_return_type, f_calltypes = numba.typed_passes.type_inference_stage( - self.context, f_ir, (dtype,), None) - - return signature(SeriesType(f_return_type), *args) + t = args[0].get_call_type(self.context, (dtype,), {}) + return signature(SeriesType(t.return_type), *args) @bound_function("series.map") def resolve_map(self, ary, args, kws): @@ -594,11 +584,8 @@ def _resolve_combine_func(self, ary, args, kws): dtype2 = args[0].dtype if dtype2 == types.NPDatetime('ns'): dtype2 = pandas_timestamp_type - code = args[1].literal_value.code - f_ir = numba.ir_utils.get_ir_of_code({'np': np}, code) - f_typemap, f_return_type, f_calltypes = numba.typed_passes.type_inference_stage( - self.context, f_ir, (dtype1, dtype2,), None) - return signature(SeriesType(f_return_type), *args) + t = args[1].get_call_type(self.context, (dtype1, dtype2,), {}) + return signature(SeriesType(t.return_type), *args) @bound_function("series.combine") def resolve_combine(self, ary, args, kws): diff --git a/sdc/io/csv_ext.py b/sdc/io/csv_ext.py index 292d82204..2765081d9 100644 --- a/sdc/io/csv_ext.py +++ b/sdc/io/csv_ext.py @@ -93,7 +93,7 @@ def csv_array_analysis(csv_node, equiv_set, typemap, array_analysis): equiv_set.insert_equiv(col_var, shape) post.extend(c_post) all_shapes.append(shape[0]) - equiv_set.define(col_var) + equiv_set.define(col_var, {}) if len(all_shapes) > 1: equiv_set.insert_equiv(*all_shapes) diff --git a/sdc/tests/test_basic.py b/sdc/tests/test_basic.py index d89df5012..65537fbef 100644 --- a/sdc/tests/test_basic.py +++ b/sdc/tests/test_basic.py @@ -327,8 +327,7 @@ def test_array_reduce(self): self.assertEqual(count_array_OneDs(), 0) self.assertEqual(count_parfor_OneDs(), 1) - @unittest.skipIf(check_numba_version('0.46.0'), - "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690") + @unittest.expectedFailure # https://github.com/numba/numba/issues/4690 def test_dist_return(self): def test_impl(N): A = np.arange(N) @@ -345,8 +344,7 @@ def test_impl(N): self.assertEqual(count_array_OneDs(), 1) self.assertEqual(count_parfor_OneDs(), 1) - @unittest.skipIf(check_numba_version('0.46.0'), - "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690") + @unittest.expectedFailure # https://github.com/numba/numba/issues/4690 def test_dist_return_tuple(self): def test_impl(N): A = np.arange(N) @@ -375,8 +373,7 @@ def test_impl(A): np.testing.assert_allclose(hpat_func(arr) / self.num_ranks, test_impl(arr)) self.assertEqual(count_array_OneDs(), 1) - @unittest.skipIf(check_numba_version('0.46.0'), - "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690") + @unittest.expectedFailure # https://github.com/numba/numba/issues/4690 def test_rebalance(self): def test_impl(N): A = np.arange(n) @@ -394,8 +391,7 @@ def test_impl(N): finally: sdc.distributed_analysis.auto_rebalance = False - @unittest.skipIf(check_numba_version('0.46.0'), - "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690") + @unittest.expectedFailure # https://github.com/numba/numba/issues/4690 def test_rebalance_loop(self): def test_impl(N): A = np.arange(n) diff --git a/sdc/tests/test_dataframe.py b/sdc/tests/test_dataframe.py index cf9ab680f..7614e3a57 100644 --- a/sdc/tests/test_dataframe.py +++ b/sdc/tests/test_dataframe.py @@ -160,8 +160,7 @@ def test_impl(df): dtype=pd.api.types.CategoricalDtype(['N', 'Y']))}) pd.testing.assert_frame_equal(hpat_func(df.copy(deep=True)), test_impl(df)) - @unittest.skipIf(check_numba_version('0.46.0'), - "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690") + @unittest.expectedFailure # https://github.com/numba/numba/issues/4690 def test_box_dist_return(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) diff --git a/sdc/tests/test_ml.py b/sdc/tests/test_ml.py index 40d27d85d..95c2bf5c5 100644 --- a/sdc/tests/test_ml.py +++ b/sdc/tests/test_ml.py @@ -117,8 +117,7 @@ def test_impl(n): self.assertEqual(count_array_OneDs(), 1) self.assertEqual(count_parfor_OneDs(), 2) - @unittest.skipIf(check_numba_version('0.46.0'), - "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690") + @unittest.expectedFailure # https://github.com/numba/numba/issues/4690 def test_kmeans(self): def test_impl(numCenter, numIter, N, D): A = np.ones((N, D)) diff --git a/setup.py b/setup.py index 966390040..6e2b606ba 100644 --- a/setup.py +++ b/setup.py @@ -202,9 +202,6 @@ def readme(): str_libs = np_compile_args['libraries'] -if not is_win: - str_libs += ['boost_regex'] - ext_str = Extension(name="sdc.hstr_ext", sources=["sdc/_str_ext.cpp"], libraries=str_libs,