Skip to content

Commit

Permalink
extend docs and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
xflr6 committed Dec 28, 2016
1 parent fd0f22d commit c8d3ac4
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 2 deletions.
12 changes: 12 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4986,6 +4986,18 @@ def nunique(self, axis=0, dropna=True):
Returns
-------
nunique : Series
Examples
--------
>>> df = DataFrame({'A': [1, 2, 3], 'B': [1, 1, 1]})
>>> df.nunique()
A 3
B 1
>>> df.nunique(axis=1)
0 1
1 2
2 2
"""
func = functools.partial(Series.nunique, dropna=dropna)
return self.apply(func, axis=axis)
Expand Down
35 changes: 34 additions & 1 deletion pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3901,14 +3901,47 @@ def count(self):

def nunique(self, dropna=True):
"""
Return Series with number of distinct observations per group.
Return DataFrame with number of distinct observations per group for
each column.
.. versionadded:: 0.20.0
Parameters
----------
dropna : boolean, default True
Don't include NaN in the counts.
Returns
-------
nunique: DataFrame
Examples
--------
>>> df = DataFrame({'id': ['spam', 'egg', 'egg', 'spam', 'ham', 'ham'],
... 'value1': [1, 5, 5, 2, 5, 5],
... 'value2': list('abbaxy')})
>>> df
id value1 value2
0 spam 1 a
1 egg 5 b
2 egg 5 b
3 spam 2 a
4 ham 5 x
5 ham 5 y
>>> df.groupby('id').nunique()
id value1 value2
id
egg 1 1 1
ham 1 1 2
spam 1 2 1
>>> df.groupby('id').filter(lambda g: (g.nunique() > 1).any())
id value1 value2
0 spam 1 a
3 spam 2 a
4 ham 5 x
5 ham 5 y
"""
from functools import partial
func = partial(Series.nunique, dropna=dropna)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
MultiIndex, date_range, Timestamp)
import pandas as pd
import pandas.core.nanops as nanops
import pandas.core.algorithms as algorithms
import pandas.formats.printing as printing

import pandas.util.testing as tm
Expand Down Expand Up @@ -411,7 +412,7 @@ def test_count(self):
tm.assert_series_equal(result, expected)

def test_nunique(self):
f = lambda s: len(nanops.unique1d(s.dropna()))
f = lambda s: len(algorithms.unique1d(s.dropna()))
self._check_stat_op('nunique', f, has_skipna=False,
check_dtype=False, check_dates=True)

Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2806,10 +2806,28 @@ def test_nunique(self):
'B': list('abxacc'),
'C': list('abbacx'),
})

expected = DataFrame({'A': [1] * 3, 'B': [1, 2, 1], 'C': [1, 1, 2]})
result = df.groupby('A', as_index=False).nunique()
tm.assert_frame_equal(result, expected)

# as_index
expected.index = list('abc')
expected.index.name = 'A'
result = df.groupby('A').nunique()
tm.assert_frame_equal(result, expected)

# with na
result = df.replace({'x': None}).groupby('A').nunique(dropna=False)
tm.assert_frame_equal(result, expected)

# dropna
expected = DataFrame({'A': [1] * 3, 'B': [1] * 3, 'C': [1] * 3},
index=list('abc'))
expected.index.name = 'A'
result = df.replace({'x': None}).groupby('A').nunique()
tm.assert_frame_equal(result, expected)

def test_non_cython_api(self):

# GH5610
Expand Down

0 comments on commit c8d3ac4

Please sign in to comment.