Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sdc/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -1770,7 +1770,7 @@ def _gen_parfor_reductions(self, parfor, namevar_table):
_, reductions = get_parfor_reductions(
parfor, parfor.params, self.state.calltypes)

for reduce_varname, (init_val, reduce_nodes, _) in reductions.items():
for reduce_varname, (init_val, reduce_nodes) in reductions.items():
reduce_op = guard(self._get_reduce_op, reduce_nodes)
# TODO: initialize reduction vars (arrays)
reduce_var = namevar_table[reduce_varname]
Expand Down
2 changes: 1 addition & 1 deletion sdc/hiframes/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def aggregate_array_analysis(aggregate_node, equiv_set, typemap,
equiv_set.insert_equiv(col_var, shape)
post.extend(c_post)
all_shapes.append(shape[0])
equiv_set.define(col_var, {})
equiv_set.define(col_var)

if len(all_shapes) > 1:
equiv_set.insert_equiv(*all_shapes)
Expand Down
3 changes: 1 addition & 2 deletions sdc/hiframes/dataframe_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,7 @@ def run_pass(self):
out_nodes = [inst]

if isinstance(inst, ir.Assign):
if inst.value in self.state.func_ir._definitions[inst.target.name]:
self.state.func_ir._definitions[inst.target.name].remove(inst.value)
self.state.func_ir._definitions[inst.target.name].remove(inst.value)
out_nodes = self._run_assign(inst)
elif isinstance(inst, (ir.SetItem, ir.StaticSetItem)):
out_nodes = self._run_setitem(inst)
Expand Down
2 changes: 1 addition & 1 deletion sdc/hiframes/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def filter_array_analysis(filter_node, equiv_set, typemap, array_analysis):
equiv_set.insert_equiv(col_var, shape)
post.extend(c_post)
all_shapes.append(shape[0])
equiv_set.define(col_var, {})
equiv_set.define(col_var)

if len(all_shapes) > 1:
equiv_set.insert_equiv(*all_shapes)
Expand Down
20 changes: 15 additions & 5 deletions sdc/hiframes/hiframes_typed.py
Original file line number Diff line number Diff line change
Expand Up @@ -1212,7 +1212,10 @@ def _handle_series_map(self, assign, lhs, rhs, series_var):
# error checking: make sure there is function input only
if len(rhs.args) != 1:
raise ValueError("map expects 1 argument")
func = guard(get_definition, self.state.func_ir, rhs.args[0]).value.py_func
func = guard(get_definition, self.state.func_ir, rhs.args[0])
if func is None or not (isinstance(func, ir.Expr)
and func.op == 'make_function'):
raise ValueError("lambda for map not found")

dtype = self.state.typemap[series_var.name].dtype
nodes = []
Expand Down Expand Up @@ -1379,7 +1382,11 @@ def _handle_series_combine(self, assign, lhs, rhs, series_var):
raise ValueError("not enough arguments in call to combine")
if len(rhs.args) > 3:
raise ValueError("too many arguments in call to combine")
func = guard(get_definition, self.state.func_ir, rhs.args[1]).value.py_func
func = guard(get_definition, self.state.func_ir, rhs.args[1])
if func is None or not (isinstance(func, ir.Expr)
and func.op == 'make_function'):
raise ValueError("lambda for combine not found")

out_typ = self.state.typemap[lhs.name].dtype
other = rhs.args[0]
nodes = []
Expand Down Expand Up @@ -1526,16 +1533,19 @@ def f(arr, w, center): # pragma: no cover
def _handle_rolling_apply_func(self, func_node, dtype, out_dtype):
if func_node is None:
raise ValueError("cannot find kernel function for rolling.apply() call")
func_node = func_node.value.py_func
# TODO: more error checking on the kernel to make sure it doesn't
# use global/closure variables
if func_node.closure is not None:
raise ValueError("rolling apply kernel functions cannot have closure variables")
if func_node.defaults is not None:
raise ValueError("rolling apply kernel functions cannot have default arguments")
# create a function from the code object
glbs = self.state.func_ir.func_id.func.__globals__
lcs = {}
exec("def f(A): return A", glbs, lcs)
kernel_func = lcs['f']
kernel_func.__code__ = func_node.__code__
kernel_func.__name__ = func_node.__code__.co_name
kernel_func.__code__ = func_node.code
kernel_func.__name__ = func_node.code.co_name
# use hpat's sequential pipeline to enable pandas operations
# XXX seq pipeline used since dist pass causes a hang
m = numba.ir_utils._max_label
Expand Down
2 changes: 1 addition & 1 deletion sdc/hiframes/join.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def join_array_analysis(join_node, equiv_set, typemap, array_analysis):
equiv_set.insert_equiv(col_var, shape)
post.extend(c_post)
all_shapes.append(shape[0])
equiv_set.define(col_var, {})
equiv_set.define(col_var)

if len(all_shapes) > 1:
equiv_set.insert_equiv(*all_shapes)
Expand Down
12 changes: 10 additions & 2 deletions sdc/hiframes/pd_dataframe_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ def resolve_values(self, ary):
def resolve_apply(self, df, args, kws):
kws = dict(kws)
func = args[0] if len(args) > 0 else kws.get('func', None)
# check lambda
if not isinstance(func, types.MakeFunctionLiteral):
raise ValueError("df.apply(): lambda not found")

# check axis
axis = args[1] if len(args) > 1 else kws.get('axis', None)
if (axis is None or not isinstance(axis, types.IntegerLiteral)
Expand All @@ -161,8 +165,12 @@ def resolve_apply(self, df, args, kws):
dtypes.append(el_typ)

row_typ = types.NamedTuple(dtypes, Row)
t = func.get_call_type(self.context, (row_typ,), {})
return signature(SeriesType(t.return_type), *args)
code = func.literal_value.code
f_ir = numba.ir_utils.get_ir_of_code({'np': np}, code)
_, f_return_type, _ = numba.typed_passes.type_inference_stage(
self.context, f_ir, (row_typ,), None)

return signature(SeriesType(f_return_type), *args)

@bound_function("df.describe")
def resolve_describe(self, df, args, kws):
Expand Down
21 changes: 17 additions & 4 deletions sdc/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,8 +564,18 @@ def _resolve_map_func(self, ary, args, kws):
# getitem returns Timestamp for dt_index and series(dt64)
if dtype == types.NPDatetime('ns'):
dtype = pandas_timestamp_type
t = args[0].get_call_type(self.context, (dtype,), {})
return signature(SeriesType(t.return_type), *args)
code = args[0].literal_value.code
_globals = {'np': np}
# XXX hack in hiframes_typed to make globals available
if hasattr(args[0].literal_value, 'globals'):
# TODO: use code.co_names to find globals actually used?
_globals = args[0].literal_value.globals

f_ir = numba.ir_utils.get_ir_of_code(_globals, code)
f_typemap, f_return_type, f_calltypes = numba.typed_passes.type_inference_stage(
self.context, f_ir, (dtype,), None)

return signature(SeriesType(f_return_type), *args)

@bound_function("series.map")
def resolve_map(self, ary, args, kws):
Expand All @@ -584,8 +594,11 @@ def _resolve_combine_func(self, ary, args, kws):
dtype2 = args[0].dtype
if dtype2 == types.NPDatetime('ns'):
dtype2 = pandas_timestamp_type
t = args[1].get_call_type(self.context, (dtype1, dtype2,), {})
return signature(SeriesType(t.return_type), *args)
code = args[1].literal_value.code
f_ir = numba.ir_utils.get_ir_of_code({'np': np}, code)
f_typemap, f_return_type, f_calltypes = numba.typed_passes.type_inference_stage(
self.context, f_ir, (dtype1, dtype2,), None)
return signature(SeriesType(f_return_type), *args)

@bound_function("series.combine")
def resolve_combine(self, ary, args, kws):
Expand Down
2 changes: 1 addition & 1 deletion sdc/io/csv_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def csv_array_analysis(csv_node, equiv_set, typemap, array_analysis):
equiv_set.insert_equiv(col_var, shape)
post.extend(c_post)
all_shapes.append(shape[0])
equiv_set.define(col_var, {})
equiv_set.define(col_var)

if len(all_shapes) > 1:
equiv_set.insert_equiv(*all_shapes)
Expand Down
12 changes: 8 additions & 4 deletions sdc/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,8 @@ def test_array_reduce(self):
self.assertEqual(count_array_OneDs(), 0)
self.assertEqual(count_parfor_OneDs(), 1)

@unittest.expectedFailure # https://github.com/numba/numba/issues/4690
@unittest.skipIf(check_numba_version('0.46.0'),
"Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690")
def test_dist_return(self):
def test_impl(N):
A = np.arange(N)
Expand All @@ -344,7 +345,8 @@ def test_impl(N):
self.assertEqual(count_array_OneDs(), 1)
self.assertEqual(count_parfor_OneDs(), 1)

@unittest.expectedFailure # https://github.com/numba/numba/issues/4690
@unittest.skipIf(check_numba_version('0.46.0'),
"Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690")
def test_dist_return_tuple(self):
def test_impl(N):
A = np.arange(N)
Expand Down Expand Up @@ -373,7 +375,8 @@ def test_impl(A):
np.testing.assert_allclose(hpat_func(arr) / self.num_ranks, test_impl(arr))
self.assertEqual(count_array_OneDs(), 1)

@unittest.expectedFailure # https://github.com/numba/numba/issues/4690
@unittest.skipIf(check_numba_version('0.46.0'),
"Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690")
def test_rebalance(self):
def test_impl(N):
A = np.arange(n)
Expand All @@ -391,7 +394,8 @@ def test_impl(N):
finally:
sdc.distributed_analysis.auto_rebalance = False

@unittest.expectedFailure # https://github.com/numba/numba/issues/4690
@unittest.skipIf(check_numba_version('0.46.0'),
"Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690")
def test_rebalance_loop(self):
def test_impl(N):
A = np.arange(n)
Expand Down
3 changes: 2 additions & 1 deletion sdc/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ def test_impl(df):
dtype=pd.api.types.CategoricalDtype(['N', 'Y']))})
pd.testing.assert_frame_equal(hpat_func(df.copy(deep=True)), test_impl(df))

@unittest.expectedFailure # https://github.com/numba/numba/issues/4690
@unittest.skipIf(check_numba_version('0.46.0'),
"Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690")
def test_box_dist_return(self):
def test_impl(n):
df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)})
Expand Down
3 changes: 2 additions & 1 deletion sdc/tests/test_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ def test_impl(n):
self.assertEqual(count_array_OneDs(), 1)
self.assertEqual(count_parfor_OneDs(), 2)

@unittest.expectedFailure # https://github.com/numba/numba/issues/4690
@unittest.skipIf(check_numba_version('0.46.0'),
"Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690")
def test_kmeans(self):
def test_impl(numCenter, numIter, N, D):
A = np.ones((N, D))
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,9 @@ def readme():

str_libs = np_compile_args['libraries']

if not is_win:
str_libs += ['boost_regex']

ext_str = Extension(name="sdc.hstr_ext",
sources=["sdc/_str_ext.cpp"],
libraries=str_libs,
Expand Down