diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index 6ee22b3ba..a38d96114 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -925,8 +925,8 @@ def sdc_pandas_dataframe_drop_impl(df, labels=None, axis=0, index=None, columns= column_list.append((f'new_col_{column}_data_df', column)) data = ', '.join(f'"{column_name}": {column}' for column, column_name in column_list) - # TODO: Handle index - func_text.append(f"return pandas.DataFrame({{{data}}})\n") + index = 'df.index' + func_text.append(f"return pandas.DataFrame({{{data}}}, index={index})\n") func_definition.extend([indent + func_line for func_line in func_text]) func_def = '\n'.join(func_definition) @@ -977,7 +977,7 @@ def sdc_pandas_dataframe_drop(df, labels=None, axis=0, index=None, columns=None, ************************************************* Pandas DataFrame method :meth:`pandas.DataFrame.drop` implementation. .. only:: developer - Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_drop* + Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_df_drop* Parameters ----------- df: :obj:`pandas.DataFrame` diff --git a/sdc/tests/test_dataframe.py b/sdc/tests/test_dataframe.py index 5008e8d4d..9cc394018 100644 --- a/sdc/tests/test_dataframe.py +++ b/sdc/tests/test_dataframe.py @@ -1148,42 +1148,112 @@ def test_impl(df): h_out = hpat_func(df) pd.testing.assert_frame_equal(out, h_out) - def test_df_drop_one_column(self): + def test_df_drop_one_column_unboxing(self): def test_impl(df): return df.drop(columns='A') - df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}) - hpat_func = self.jit(test_impl) - pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) + index_to_test = [[1, 2, 3, 4], + [.1, .2, .3, .4], + None, + ['a', 'b', 'c', 'd']] + + sdc_func = self.jit(test_impl) + + for index in index_to_test: + with self.subTest(index=index): + df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}, + index=index) + pd.testing.assert_frame_equal(sdc_func(df), test_impl(df)) + + def test_df_drop_one_column(self): + def test_impl(index): + df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}, + index=index) + return df.drop(columns='A') + + index_to_test = [[1, 2, 3, 4], + [.1, .2, .3, .4], + ['a', 'b', 'c', 'd']] + + sdc_func = self.jit(test_impl) + + for index in index_to_test: + with self.subTest(index=index): + pd.testing.assert_frame_equal(sdc_func(index), test_impl(index)) + + def test_df_drop_tuple_column_unboxing(self): + def gen_test_impl(do_jit=False): + def test_impl(df): + if do_jit == True: # noqa + return df.drop(columns=('A', 'C')) + else: + return df.drop(columns=['A', 'C']) + + return test_impl + + index_to_test = [[1, 2, 3, 4], + [.1, .2, .3, .4], + None, + ['a', 'b', 'c', 'd']] + + test_impl = gen_test_impl() + sdc_func = self.jit(gen_test_impl(do_jit=True)) + + for index in index_to_test: + with self.subTest(index=index): + df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}, + index=index) + pd.testing.assert_frame_equal(sdc_func(df), test_impl(df)) - @skip_sdc_jit def test_df_drop_tuple_column(self): - # Pandas supports only list as a parameter - def test_impl(df): - return df.drop(columns=['A', 'B']) + def gen_test_impl(do_jit=False): + def test_impl(index): + df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}, + index=index) + if do_jit == True: # noqa + return df.drop(columns=('A', 'C')) + else: + return df.drop(columns=['A', 'C']) - # Numba supports only tuple iteration - def test_sdc_impl(df): - return df.drop(columns=('A', 'B')) + return test_impl - df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}) - hpat_func = self.jit(test_sdc_impl) - pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) + index_to_test = [[1, 2, 3, 4], + [.1, .2, .3, .4], + ['a', 'b', 'c', 'd']] + + test_impl = gen_test_impl() + sdc_func = self.jit(gen_test_impl(do_jit=True)) - @unittest.skip("Implement Index for DataFrames") + for index in index_to_test: + with self.subTest(index=index): + pd.testing.assert_frame_equal(sdc_func(index), test_impl(index)) + + @unittest.skip("ValueError when return empty dataframe") def test_df_drop_tuple_columns_all(self): - def test_impl(df): - return df.drop(columns=['A', 'B', 'C']) + def gen_test_impl(do_jit=False): + def test_impl(df): + if do_jit == True: # noqa + return df.drop(columns=('A', 'B', 'C')) + else: + return df.drop(columns=['A', 'B', 'C']) - # Numba supports only tuple iteration - def test_sdc_impl(df): - return df.drop(columns=('A', 'B', 'C')) + return test_impl - df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}) - hpat_func = self.jit(test_sdc_impl) - pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) + index_to_test = [[1, 2, 3, 4], + [.1, .2, .3, .4], + None, + ['a', 'b', 'c', 'd']] + + test_impl = gen_test_impl() + sdc_func = self.jit(gen_test_impl(do_jit=True)) + + for index in index_to_test: + with self.subTest(index=index): + df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}, + index=index) + + pd.testing.assert_frame_equal(sdc_func(df), test_impl(df)) - @skip_sdc_jit def test_df_drop_by_column_errors_ignore(self): def test_impl(df): return df.drop(columns='M', errors='ignore')