Skip to content

Commit

Permalink
COMPAT: Emit warning when groupby by a tuple
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Dec 11, 2017
1 parent 2aa4aa9 commit 209ffce
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 1 deletion.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.22.0.txt
Expand Up @@ -202,6 +202,9 @@ Deprecations
- ``Series.from_array`` and ``SparseSeries.from_array`` are deprecated. Use the normal constructor ``Series(..)`` and ``SparseSeries(..)`` instead (:issue:`18213`).
- ``DataFrame.as_matrix`` is deprecated. Use ``DataFrame.values`` instead (:issue:`18458`).
- ``Series.asobject``, ``DatetimeIndex.asobject``, ``PeriodIndex.asobject`` and ``TimeDeltaIndex.asobject`` have been deprecated. Use ``.astype(object)`` instead (:issue:`18572`)
- Grouping by a tuple of keys now emits a ``FutureWarning`` and is deprecated.
In the future, a tuple passed to ``'by'`` will always refer to a single key
that is the actual tuple, instead of treating the tuple as multiple keys (:issue:`18314`)

.. _whatsnew_0220.prior_deprecations:

Expand Down
10 changes: 9 additions & 1 deletion pandas/core/groupby.py
Expand Up @@ -2850,7 +2850,15 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
elif isinstance(key, BaseGrouper):
return key, [], obj

# Everything which is not a list is a key (including tuples):
tuple_as_list = isinstance(key, tuple) and key not in obj
if tuple_as_list:
msg = ("Interpreting tuple 'by' as a list of keys, rather than "
"a single key. Use 'by={!r}' instead of 'by={!r}'. In the "
"future, a tuple will always mean a single key.".format(
list(key), key))
warnings.warn(msg, FutureWarning, stacklevel=5)
key = list(key)

if not isinstance(key, list):
keys = [key]
match_axis_length = False
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/groupby/test_groupby.py
Expand Up @@ -2727,6 +2727,23 @@ def test_empty_dataframe_groupby(self):

assert_frame_equal(result, expected)

def test_tuple_warns(self):
# https://github.com/pandas-dev/pandas/issues/18314
df = pd.DataFrame({('a', 'b'): [1, 1, 2, 2], 'a': [1, 1, 1, 2],
'b': [1, 2, 2, 2], 'c': [1, 1, 1, 1]})
with tm.assert_produces_warning(FutureWarning) as w:
df[['a', 'b', 'c']].groupby(('a', 'b')).c.mean()

assert "Interpreting tuple 'by' as a list" in str(w[0].message)

with tm.assert_produces_warning(FutureWarning) as w:
df[['a', 'b', 'c']].groupby(('a', 'b')).c.mean()

assert "Interpreting tuple 'by' as a list" in str(w[0].message)

with tm.assert_produces_warning(None):
df.groupby(('a', 'b')).c.mean()


def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
tups = lmap(tuple, df[keys].values)
Expand Down

0 comments on commit 209ffce

Please sign in to comment.