Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions sdc/datatypes/hpat_pandas_dataframe_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,8 +790,9 @@ def sdc_pandas_dataframe_drop_impl(df, labels=None, axis=0, index=None, columns=
column_list.append((f'new_col_{column}_data_df', column))

data = ', '.join(f'"{column_name}": {column}' for column, column_name in column_list)
# TODO: Handle index
func_text.append(f"return pandas.DataFrame({{{data}}})\n")
index = 'df.index'

func_text.append(f'return pandas.DataFrame({{{data}}}, index={index})\n')
func_definition.extend([indent + func_line for func_line in func_text])
func_def = '\n'.join(func_definition)

Expand Down Expand Up @@ -842,7 +843,7 @@ def sdc_pandas_dataframe_drop(df, labels=None, axis=0, index=None, columns=None,
*************************************************
Pandas DataFrame method :meth:`pandas.DataFrame.drop` implementation.
.. only:: developer
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_drop*
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_df_drop*
Parameters
-----------
df: :obj:`pandas.DataFrame`
Expand Down
13 changes: 7 additions & 6 deletions sdc/hiframes/boxing.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,13 @@ def box_dataframe(typ, val, c):

mod_name = context.insert_const_string(c.builder.module, "pandas")
class_obj = pyapi.import_module_noblock(mod_name)
df_obj = pyapi.call_method(class_obj, "DataFrame", ())

# set df.index if necessary
if typ.index != types.none:
arr_obj = _box_series_data(typ.index.dtype, typ.index, dataframe.index, c)
df_obj = pyapi.call_method(class_obj, "DataFrame", (c.pyapi.make_none(), arr_obj))
else:
df_obj = pyapi.call_method(class_obj, "DataFrame", ())

for i, cname, arr, arr_typ, dtype in zip(range(n_cols), col_names, col_arrs, arr_typs, dtypes):
# df['cname'] = boxed_arr
Expand Down Expand Up @@ -262,11 +268,6 @@ def box_dataframe(typ, val, c):
# pyapi.decref(arr_obj)
pyapi.decref(cname_obj)

# set df.index if necessary
if typ.index != types.none:
arr_obj = _box_series_data(typ.index.dtype, typ.index, dataframe.index, c)
pyapi.object_setattr_string(df_obj, 'index', arr_obj)

pyapi.decref(class_obj)
# pyapi.gil_release(gil_state) # release GIL
return df_obj
Expand Down
94 changes: 78 additions & 16 deletions sdc/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1077,29 +1077,82 @@ def test_impl(df):
h_out = hpat_func(df)
pd.testing.assert_frame_equal(out, h_out)

def test_df_drop_one_column(self):
def test_df_drop_one_column_unboxing(self):
def test_impl(df):
return df.drop(columns='A')

df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]})
hpat_func = self.jit(test_impl)
pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
index_to_test = [[1, 2, 3, 4],
[.1, .2, .3, .4],
None,
['a', 'b', 'c', 'd']]

@skip_sdc_jit
def test_df_drop_tuple_column(self):
# Pandas supports only list as a parameter
sdc_func = self.jit(test_impl)

for index in index_to_test:
with self.subTest(index=index):
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]},
index=index)
pd.testing.assert_frame_equal(sdc_func(df), test_impl(df))

def test_df_drop_one_column(self):
def test_impl(index):
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]},
index=index)
return df.drop(columns='A')

index_to_test = [[1, 2, 3, 4],
[.1, .2, .3, .4],
['a', 'b', 'c', 'd']]

sdc_func = self.jit(test_impl)

for index in index_to_test:
with self.subTest(index=index):
pd.testing.assert_frame_equal(sdc_func(index), test_impl(index))

def test_df_drop_tuple_column_unboxing(self):
def test_impl(df):
return df.drop(columns=['A', 'B'])
return df.drop(columns=['A', 'C'])

# Numba supports only tuple iteration
def test_sdc_impl(df):
return df.drop(columns=('A', 'B'))
return df.drop(columns=('A', 'C'))

df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]})
hpat_func = self.jit(test_sdc_impl)
pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
index_to_test = [[1, 2, 3, 4],
[.1, .2, .3, .4],
None,
['a', 'b', 'c', 'd']]

sdc_func = self.jit(test_sdc_impl)

for index in index_to_test:
with self.subTest(index=index):
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]},
index=index)
pd.testing.assert_frame_equal(sdc_func(df), test_impl(df))

def test_df_drop_tuple_column(self):
def test_impl(index):
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]},
index=index)
return df.drop(columns=['A', 'C'])

# Numba supports only tuple iteration
def test_sdc_impl(index):
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]},
index=index)
return df.drop(columns=('A', 'C'))

index_to_test = [[1, 2, 3, 4],
[.1, .2, .3, .4],
['a', 'b', 'c', 'd']]

sdc_func = self.jit(test_sdc_impl)

for index in index_to_test:
with self.subTest(index=index):
pd.testing.assert_frame_equal(sdc_func(index), test_impl(index))

@unittest.skip("Implement Index for DataFrames")
def test_df_drop_tuple_columns_all(self):
def test_impl(df):
return df.drop(columns=['A', 'B', 'C'])
Expand All @@ -1108,9 +1161,18 @@ def test_impl(df):
def test_sdc_impl(df):
return df.drop(columns=('A', 'B', 'C'))

df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]})
hpat_func = self.jit(test_sdc_impl)
pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
index_to_test = [[1, 2, 3, 4],
[.1, .2, .3, .4],
None,
['a', 'b', 'c', 'd']]

sdc_func = self.jit(test_sdc_impl)

for index in index_to_test:
with self.subTest(index=index):
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]},
index=index)
pd.testing.assert_frame_equal(sdc_func(df), test_impl(df))

@skip_sdc_jit
def test_df_drop_by_column_errors_ignore(self):
Expand Down