diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index 35baec3aa..e901fb316 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -790,8 +790,9 @@ def sdc_pandas_dataframe_drop_impl(df, labels=None, axis=0, index=None, columns= column_list.append((f'new_col_{column}_data_df', column)) data = ', '.join(f'"{column_name}": {column}' for column, column_name in column_list) - # TODO: Handle index - func_text.append(f"return pandas.DataFrame({{{data}}})\n") + index = 'df.index' + + func_text.append(f'return pandas.DataFrame({{{data}}}, index={index})\n') func_definition.extend([indent + func_line for func_line in func_text]) func_def = '\n'.join(func_definition) @@ -842,7 +843,7 @@ def sdc_pandas_dataframe_drop(df, labels=None, axis=0, index=None, columns=None, ************************************************* Pandas DataFrame method :meth:`pandas.DataFrame.drop` implementation. .. only:: developer - Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_drop* + Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_df_drop* Parameters ----------- df: :obj:`pandas.DataFrame` diff --git a/sdc/hiframes/boxing.py b/sdc/hiframes/boxing.py index 6b2a69113..f0e6c2d35 100644 --- a/sdc/hiframes/boxing.py +++ b/sdc/hiframes/boxing.py @@ -224,7 +224,13 @@ def box_dataframe(typ, val, c): mod_name = context.insert_const_string(c.builder.module, "pandas") class_obj = pyapi.import_module_noblock(mod_name) - df_obj = pyapi.call_method(class_obj, "DataFrame", ()) + + # set df.index if necessary + if typ.index != types.none: + arr_obj = _box_series_data(typ.index.dtype, typ.index, dataframe.index, c) + df_obj = pyapi.call_method(class_obj, "DataFrame", (c.pyapi.make_none(), arr_obj)) + else: + df_obj = pyapi.call_method(class_obj, "DataFrame", ()) for i, cname, arr, arr_typ, dtype in zip(range(n_cols), col_names, col_arrs, arr_typs, dtypes): # df['cname'] = boxed_arr @@ -262,11 +268,6 @@ def box_dataframe(typ, val, c): # pyapi.decref(arr_obj) pyapi.decref(cname_obj) - # set df.index if necessary - if typ.index != types.none: - arr_obj = _box_series_data(typ.index.dtype, typ.index, dataframe.index, c) - pyapi.object_setattr_string(df_obj, 'index', arr_obj) - pyapi.decref(class_obj) # pyapi.gil_release(gil_state) # release GIL return df_obj diff --git a/sdc/tests/test_dataframe.py b/sdc/tests/test_dataframe.py index 66a4f54d9..c575f027e 100644 --- a/sdc/tests/test_dataframe.py +++ b/sdc/tests/test_dataframe.py @@ -1077,29 +1077,82 @@ def test_impl(df): h_out = hpat_func(df) pd.testing.assert_frame_equal(out, h_out) - def test_df_drop_one_column(self): + def test_df_drop_one_column_unboxing(self): def test_impl(df): return df.drop(columns='A') - df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}) - hpat_func = self.jit(test_impl) - pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) + index_to_test = [[1, 2, 3, 4], + [.1, .2, .3, .4], + None, + ['a', 'b', 'c', 'd']] - @skip_sdc_jit - def test_df_drop_tuple_column(self): - # Pandas supports only list as a parameter + sdc_func = self.jit(test_impl) + + for index in index_to_test: + with self.subTest(index=index): + df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}, + index=index) + pd.testing.assert_frame_equal(sdc_func(df), test_impl(df)) + + def test_df_drop_one_column(self): + def test_impl(index): + df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}, + index=index) + return df.drop(columns='A') + + index_to_test = [[1, 2, 3, 4], + [.1, .2, .3, .4], + ['a', 'b', 'c', 'd']] + + sdc_func = self.jit(test_impl) + + for index in index_to_test: + with self.subTest(index=index): + pd.testing.assert_frame_equal(sdc_func(index), test_impl(index)) + + def test_df_drop_tuple_column_unboxing(self): def test_impl(df): - return df.drop(columns=['A', 'B']) + return df.drop(columns=['A', 'C']) # Numba supports only tuple iteration def test_sdc_impl(df): - return df.drop(columns=('A', 'B')) + return df.drop(columns=('A', 'C')) - df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}) - hpat_func = self.jit(test_sdc_impl) - pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) + index_to_test = [[1, 2, 3, 4], + [.1, .2, .3, .4], + None, + ['a', 'b', 'c', 'd']] + + sdc_func = self.jit(test_sdc_impl) + + for index in index_to_test: + with self.subTest(index=index): + df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}, + index=index) + pd.testing.assert_frame_equal(sdc_func(df), test_impl(df)) + + def test_df_drop_tuple_column(self): + def test_impl(index): + df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}, + index=index) + return df.drop(columns=['A', 'C']) + + # Numba supports only tuple iteration + def test_sdc_impl(index): + df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}, + index=index) + return df.drop(columns=('A', 'C')) + + index_to_test = [[1, 2, 3, 4], + [.1, .2, .3, .4], + ['a', 'b', 'c', 'd']] + + sdc_func = self.jit(test_sdc_impl) + + for index in index_to_test: + with self.subTest(index=index): + pd.testing.assert_frame_equal(sdc_func(index), test_impl(index)) - @unittest.skip("Implement Index for DataFrames") def test_df_drop_tuple_columns_all(self): def test_impl(df): return df.drop(columns=['A', 'B', 'C']) @@ -1108,9 +1161,18 @@ def test_impl(df): def test_sdc_impl(df): return df.drop(columns=('A', 'B', 'C')) - df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}) - hpat_func = self.jit(test_sdc_impl) - pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) + index_to_test = [[1, 2, 3, 4], + [.1, .2, .3, .4], + None, + ['a', 'b', 'c', 'd']] + + sdc_func = self.jit(test_sdc_impl) + + for index in index_to_test: + with self.subTest(index=index): + df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]}, + index=index) + pd.testing.assert_frame_equal(sdc_func(df), test_impl(df)) @skip_sdc_jit def test_df_drop_by_column_errors_ignore(self):