Skip to content

Commit

Permalink
BUG: Fix .groupby(categorical, sort=False) failing
Browse files Browse the repository at this point in the history
  • Loading branch information
kernc committed Feb 18, 2017
1 parent 29aeffb commit 0c550e6
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,7 @@ Bug Fixes

- Bug in ``resample``, where a non-string ```loffset`` argument would not be applied when resampling a timeseries (:issue:`13218`)

- Bug in ``.groupby`` where ```.groupby(categorical, sort=False)`` would raise ``ValueError`` due to non-matching categories (:issue:`13179`)



Expand Down
5 changes: 5 additions & 0 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2315,6 +2315,11 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
# groupby
else:
cat = self.grouper.unique()
all_categories = self.grouper.categories
cat.add_categories(
all_categories[
~all_categories.isin(cat.categories)],
inplace=True) # GH-13179
self.grouper = self.grouper.reorder_categories(
cat.categories)

Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,30 @@ def test_groupby_multi_categorical_as_index(self):

tm.assert_frame_equal(result, expected, check_index_type=True)

def test_groupby_preserve_categories(self):
# GH-13179
categories = list('abc')

# ordered=True
df = DataFrame({'A': pd.Categorical(list('ba'),
categories=categories,
ordered=True)})
index = pd.CategoricalIndex(categories, categories, ordered=True)
tm.assert_index_equal(df.groupby('A', sort=True).first().index, index)
tm.assert_index_equal(df.groupby('A', sort=False).first().index, index)

# ordered=False
df = DataFrame({'A': pd.Categorical(list('ba'),
categories=categories,
ordered=False)})
sort_index = pd.CategoricalIndex(categories, categories, ordered=False)
nosort_index = pd.CategoricalIndex(list('bac'), list('bac'),
ordered=False)
tm.assert_index_equal(df.groupby('A', sort=True).first().index,
sort_index)
tm.assert_index_equal(df.groupby('A', sort=False).first().index,
nosort_index)

def test_groupby_preserve_categorical_dtype(self):
# GH13743, GH13854
df = DataFrame({'A': [1, 2, 1, 1, 2],
Expand Down

0 comments on commit 0c550e6

Please sign in to comment.