diff --git a/hpat/hiframes/pd_dataframe_ext.py b/hpat/hiframes/pd_dataframe_ext.py index de678b898..25dd34072 100644 --- a/hpat/hiframes/pd_dataframe_ext.py +++ b/hpat/hiframes/pd_dataframe_ext.py @@ -625,38 +625,63 @@ def generic(self, args, kws): @overload_method(DataFrameType, 'merge') @overload(pd.merge) def merge_overload(left, right, how='inner', on=None, left_on=None, - right_on=None, left_index=False, right_index=False, sort=False, - suffixes=('_x', '_y'), copy=True, indicator=False, validate=None): + right_on=None, left_index=False, right_index=False, sort=False, + suffixes=('_x', '_y'), copy=True, indicator=False, validate=None): - def _impl(left, right, how='inner', on=None, left_on=None, - right_on=None, left_index=False, right_index=False, sort=False, - suffixes=('_x', '_y'), copy=True, indicator=False, validate=None): - if on is not None: - left_on = right_on = on + # 'on' can be a list or a string literal, use different overloads for them + if on is not None and isinstance(on, types.StringLiteral): + # use literal strings value to fetch them as consts from IR later + new_right_on = on.literal_value + new_left_on = on.literal_value - return hpat.hiframes.api.join_dummy( - left, right, left_on, right_on, how) + def _impl(left, right, how='inner', on=None, left_on=None, + right_on=None, left_index=False, right_index=False, sort=False, + suffixes=('_x', '_y'), copy=True, indicator=False, validate=None): + + return hpat.hiframes.api.join_dummy(left, right, new_left_on, new_right_on, how) + else: + def _impl(left, right, how='inner', on=None, left_on=None, + right_on=None, left_index=False, right_index=False, sort=False, + suffixes=('_x', '_y'), copy=True, indicator=False, validate=None): + if on is not None: + left_on = right_on = on + + return hpat.hiframes.api.join_dummy(left, right, left_on, right_on, how) return _impl + @overload(pd.merge_asof) def merge_asof_overload(left, right, on=None, left_on=None, right_on=None, - left_index=False, right_index=False, by=None, left_by=None, - right_by=None, suffixes=('_x', '_y'), tolerance=None, - allow_exact_matches=True, direction='backward'): - - def _impl(left, right, on=None, left_on=None, right_on=None, - left_index=False, right_index=False, by=None, left_by=None, - right_by=None, suffixes=('_x', '_y'), tolerance=None, - allow_exact_matches=True, direction='backward'): - if on is not None: - left_on = right_on = on + left_index=False, right_index=False, by=None, left_by=None, + right_by=None, suffixes=('_x', '_y'), tolerance=None, + allow_exact_matches=True, direction='backward'): + + # 'on' can be a list or a string literal, use different overloads for them + if on is not None and isinstance(on, types.StringLiteral): + # use literal strings value to fetch them as consts from IR later + new_right_on = on.literal_value + new_left_on = on.literal_value + + def _impl(left, right, on=None, left_on=None, right_on=None, + left_index=False, right_index=False, by=None, left_by=None, + right_by=None, suffixes=('_x', '_y'), tolerance=None, + allow_exact_matches=True, direction='backward'): + + return hpat.hiframes.api.join_dummy(left, right, new_left_on, new_right_on, 'asof') + else: + def _impl(left, right, on=None, left_on=None, right_on=None, + left_index=False, right_index=False, by=None, left_by=None, + right_by=None, suffixes=('_x', '_y'), tolerance=None, + allow_exact_matches=True, direction='backward'): + if on is not None: + left_on = right_on = on - return hpat.hiframes.api.join_dummy( - left, right, left_on, right_on, 'asof') + return hpat.hiframes.api.join_dummy(left, right, left_on, right_on, 'asof') return _impl + @overload_method(DataFrameType, 'pivot_table') def pivot_table_overload(df, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All', diff --git a/hpat/tests/test_join.py b/hpat/tests/test_join.py index 5f1947a7f..da6241ee7 100644 --- a/hpat/tests/test_join.py +++ b/hpat/tests/test_join.py @@ -169,8 +169,6 @@ def test_impl(A1, B1, C1, A2, B2, D2): self.assertEqual(h_res, p_res) self.assertEqual(count_array_OneDs(), 3) - @unittest.skip('ValueError - fix needed\n' - 'Failed in hpat mode pipeline (step: typed dataframe pass)\n') def test_join_datetime_seq1(self): def test_impl(df1, df2): return pd.merge(df1, df2, on='time') @@ -184,8 +182,6 @@ def test_impl(df1, df2): ['2017-01-01', '2017-01-06', '2017-01-03']), 'A': [7, 8, 9]}) pd.testing.assert_frame_equal(hpat_func(df1, df2), test_impl(df1, df2)) - @unittest.skip('ValueError - fix needed\n' - 'Failed in hpat mode pipeline (step: typed dataframe pass)\n') def test_join_datetime_parallel1(self): def test_impl(df1, df2): df3 = pd.merge(df1, df2, on='time') @@ -206,8 +202,6 @@ def test_impl(df1, df2): self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) - @unittest.skip('ValueError - fix needed\n' - 'Failed in hpat mode pipeline (step: typed dataframe pass)\n') def test_merge_asof_seq1(self): def test_impl(df1, df2): return pd.merge_asof(df1, df2, on='time') @@ -222,8 +216,6 @@ def test_impl(df1, df2): '2017-02-25']), 'A': [2,3,7,8,9]}) pd.testing.assert_frame_equal(hpat_func(df1, df2), test_impl(df1, df2)) - @unittest.skip('ValueError - fix needed\n' - 'Failed in hpat mode pipeline (step: typed dataframe pass)\n') def test_merge_asof_parallel1(self): def test_impl(): df1 = pd.read_parquet('asof1.pq') @@ -234,8 +226,6 @@ def test_impl(): hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) - @unittest.skip('ValueError - fix needed\n' - 'Failed in hpat mode pipeline (step: typed dataframe pass)\n') def test_join_left_seq1(self): def test_impl(df1, df2): return pd.merge(df1, df2, how='left', on='key') @@ -253,8 +243,6 @@ def test_impl(df1, df2): self.assertEqual( set(h_res.B.dropna().values), set(res.B.dropna().values)) - @unittest.skip('ValueError - fix needed\n' - 'Failed in hpat mode pipeline (step: typed dataframe pass)\n') def test_join_left_seq2(self): def test_impl(df1, df2): return pd.merge(df1, df2, how='left', on='key') @@ -273,8 +261,6 @@ def test_impl(df1, df2): self.assertEqual( set(h_res.B.dropna().values), set(res.B.dropna().values)) - @unittest.skip('ValueError - fix needed\n' - 'Failed in hpat mode pipeline (step: typed dataframe pass)\n') def test_join_right_seq1(self): def test_impl(df1, df2): return pd.merge(df1, df2, how='right', on='key') @@ -292,8 +278,6 @@ def test_impl(df1, df2): self.assertEqual( set(h_res.A.dropna().values), set(res.A.dropna().values)) - @unittest.skip('ValueError - fix needed\n' - 'Failed in hpat mode pipeline (step: typed dataframe pass)\n') def test_join_outer_seq1(self): def test_impl(df1, df2): return pd.merge(df1, df2, how='outer', on='key') @@ -327,8 +311,6 @@ def test_impl(df1, df2, df3, df4): df4 = pd.DataFrame({'B': 2*np.arange(n)+1, 'BBB': n+np.arange(n)+1.0}) pd.testing.assert_frame_equal(hpat_func(df1, df2, df3, df4)[1], test_impl(df1, df2, df3, df4)[1]) - @unittest.skip('ValueError - fix needed\n' - 'Failed in hpat mode pipeline (step: typed dataframe pass)\n') def test_join_cat1(self): def test_impl(): ct_dtype = CategoricalDtype(['A', 'B', 'C']) @@ -345,8 +327,6 @@ def test_impl(): hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) - @unittest.skip('ValueError - fix needed\n' - 'Failed in hpat mode pipeline (step: typed dataframe pass)\n') def test_join_cat2(self): # test setting NaN in categorical array def test_impl(): @@ -366,8 +346,6 @@ def test_impl(): hpat_func().sort_values('C1').reset_index(drop=True), test_impl().sort_values('C1').reset_index(drop=True)) - @unittest.skip('ValueError - fix needed\n' - 'Failed in hpat mode pipeline (step: typed dataframe pass)\n') def test_join_cat_parallel1(self): # TODO: cat as keys def test_impl():