Skip to content

Commit

Permalink
BUG: agg order for list is not maintained (pandas-dev#41017)
Browse files Browse the repository at this point in the history
  • Loading branch information
DriesSchaumont authored and JulianWgs committed Jul 3, 2021
1 parent d33a019 commit 72c0f4e
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -973,6 +973,7 @@ Other
- Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`)
- Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`)
- Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`)
- Bug in :meth:`DataFrame.agg()` not sorting the aggregated axis in the order of the provided aggragation functions when one or more aggregation function fails to produce results (:issue:`33634`)
- Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`)
- Bug in :class:`Series` backed by :class:`DatetimeArray` or :class:`TimedeltaArray` sometimes failing to set the array's ``freq`` to ``None`` (:issue:`41425`)

Expand Down
14 changes: 11 additions & 3 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,12 +376,10 @@ def agg_list_like(self) -> FrameOrSeriesUnion:
raise ValueError("no results")

try:
return concat(results, keys=keys, axis=1, sort=False)
concatenated = concat(results, keys=keys, axis=1, sort=False)
except TypeError as err:

# we are concatting non-NDFrame objects,
# e.g. a list of scalars

from pandas import Series

result = Series(results, index=keys, name=obj.name)
Expand All @@ -390,6 +388,16 @@ def agg_list_like(self) -> FrameOrSeriesUnion:
"cannot combine transform and aggregation operations"
) from err
return result
else:
# Concat uses the first index to determine the final indexing order.
# The union of a shorter first index with the other indices causes
# the index sorting to be different from the order of the aggregating
# functions. Reindex if this is the case.
index_size = concatenated.index.size
full_ordered_index = next(
result.index for result in results if result.index.size == index_size
)
return concatenated.reindex(full_ordered_index, copy=False)

def agg_dict_like(self) -> FrameOrSeriesUnion:
"""
Expand Down
39 changes: 35 additions & 4 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1110,10 +1110,9 @@ def test_agg_multiple_mixed_no_warning():
with tm.assert_produces_warning(None):
result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"])

# For backwards compatibility, the result's index is
# still sorted by function name, so it's ['min', 'sum']
# not ['sum', 'min'].
expected = expected[["D", "C", "B", "A"]]
# GH40420: the result of .agg should have an index that is sorted
# according to the arguments provided to agg.
expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"])
tm.assert_frame_equal(result, expected)


Expand Down Expand Up @@ -1521,6 +1520,38 @@ def test_apply_np_reducer(float_frame, op, how):
tm.assert_series_equal(result, expected)


def test_aggregation_func_column_order():
# GH40420: the result of .agg should have an index that is sorted
# according to the arguments provided to agg.
df = DataFrame(
[
("1", 1, 0, 0),
("2", 2, 0, 0),
("3", 3, 0, 0),
("4", 4, 5, 4),
("5", 5, 6, 6),
("6", 6, 7, 7),
],
columns=("item", "att1", "att2", "att3"),
)

def foo(s):
return s.sum() / 2

aggs = ["sum", foo, "count", "min"]
result = df.agg(aggs)
expected = DataFrame(
{
"item": ["123456", np.nan, 6, "1"],
"att1": [21.0, 10.5, 6.0, 1.0],
"att2": [18.0, 9.0, 6.0, 0.0],
"att3": [17.0, 8.5, 6.0, 0.0],
},
index=["sum", "foo", "count", "min"],
)
tm.assert_frame_equal(result, expected)


def test_apply_getitem_axis_1():
# GH 13427
df = DataFrame({"a": [0, 1, 2], "b": [1, 2, 3]})
Expand Down

0 comments on commit 72c0f4e

Please sign in to comment.