Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions sdc/datatypes/hpat_pandas_dataframe_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,8 +925,8 @@ def sdc_pandas_dataframe_drop_impl(df, labels=None, axis=0, index=None, columns=
column_list.append((f'new_col_{column}_data_df', column))

data = ', '.join(f'"{column_name}": {column}' for column, column_name in column_list)
# TODO: Handle index
func_text.append(f"return pandas.DataFrame({{{data}}})\n")
index = 'df.index'
func_text.append(f"return pandas.DataFrame({{{data}}}, index={index})\n")
func_definition.extend([indent + func_line for func_line in func_text])
func_def = '\n'.join(func_definition)

Expand Down Expand Up @@ -977,7 +977,7 @@ def sdc_pandas_dataframe_drop(df, labels=None, axis=0, index=None, columns=None,
*************************************************
Pandas DataFrame method :meth:`pandas.DataFrame.drop` implementation.
.. only:: developer
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_drop*
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_df_drop*
Parameters
-----------
df: :obj:`pandas.DataFrame`
Expand Down
118 changes: 94 additions & 24 deletions sdc/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1148,42 +1148,112 @@ def test_impl(df):
h_out = hpat_func(df)
pd.testing.assert_frame_equal(out, h_out)

def test_df_drop_one_column(self):
def test_df_drop_one_column_unboxing(self):
def test_impl(df):
return df.drop(columns='A')

df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]})
hpat_func = self.jit(test_impl)
pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
index_to_test = [[1, 2, 3, 4],
[.1, .2, .3, .4],
None,
['a', 'b', 'c', 'd']]

sdc_func = self.jit(test_impl)

for index in index_to_test:
with self.subTest(index=index):
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]},
index=index)
pd.testing.assert_frame_equal(sdc_func(df), test_impl(df))

def test_df_drop_one_column(self):
def test_impl(index):
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]},
index=index)
return df.drop(columns='A')

index_to_test = [[1, 2, 3, 4],
[.1, .2, .3, .4],
['a', 'b', 'c', 'd']]

sdc_func = self.jit(test_impl)

for index in index_to_test:
with self.subTest(index=index):
pd.testing.assert_frame_equal(sdc_func(index), test_impl(index))

def test_df_drop_tuple_column_unboxing(self):
def gen_test_impl(do_jit=False):
def test_impl(df):
if do_jit == True: # noqa
return df.drop(columns=('A', 'C'))
else:
return df.drop(columns=['A', 'C'])

return test_impl

index_to_test = [[1, 2, 3, 4],
[.1, .2, .3, .4],
None,
['a', 'b', 'c', 'd']]

test_impl = gen_test_impl()
sdc_func = self.jit(gen_test_impl(do_jit=True))

for index in index_to_test:
with self.subTest(index=index):
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]},
index=index)
pd.testing.assert_frame_equal(sdc_func(df), test_impl(df))

@skip_sdc_jit
def test_df_drop_tuple_column(self):
# Pandas supports only list as a parameter
def test_impl(df):
return df.drop(columns=['A', 'B'])
def gen_test_impl(do_jit=False):
def test_impl(index):
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]},
index=index)
if do_jit == True: # noqa
return df.drop(columns=('A', 'C'))
else:
return df.drop(columns=['A', 'C'])

# Numba supports only tuple iteration
def test_sdc_impl(df):
return df.drop(columns=('A', 'B'))
return test_impl

df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]})
hpat_func = self.jit(test_sdc_impl)
pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
index_to_test = [[1, 2, 3, 4],
[.1, .2, .3, .4],
['a', 'b', 'c', 'd']]

test_impl = gen_test_impl()
sdc_func = self.jit(gen_test_impl(do_jit=True))

@unittest.skip("Implement Index for DataFrames")
for index in index_to_test:
with self.subTest(index=index):
pd.testing.assert_frame_equal(sdc_func(index), test_impl(index))

@unittest.skip("ValueError when return empty dataframe")
def test_df_drop_tuple_columns_all(self):
def test_impl(df):
return df.drop(columns=['A', 'B', 'C'])
def gen_test_impl(do_jit=False):
def test_impl(df):
if do_jit == True: # noqa
return df.drop(columns=('A', 'B', 'C'))
else:
return df.drop(columns=['A', 'B', 'C'])

# Numba supports only tuple iteration
def test_sdc_impl(df):
return df.drop(columns=('A', 'B', 'C'))
return test_impl

df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]})
hpat_func = self.jit(test_sdc_impl)
pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
index_to_test = [[1, 2, 3, 4],
[.1, .2, .3, .4],
None,
['a', 'b', 'c', 'd']]

test_impl = gen_test_impl()
sdc_func = self.jit(gen_test_impl(do_jit=True))

for index in index_to_test:
with self.subTest(index=index):
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7], 'C': [1.0, 2.0, np.nan, 1.0]},
index=index)

pd.testing.assert_frame_equal(sdc_func(df), test_impl(df))

@skip_sdc_jit
def test_df_drop_by_column_errors_ignore(self):
def test_impl(df):
return df.drop(columns='M', errors='ignore')
Expand Down