Skip to content

Commit

Permalink
BUG: Datetime values in DataFrame.quantile()
Browse files Browse the repository at this point in the history
Closes pandas-dev#6965

previously returned nonsense
  • Loading branch information
Tom Augspurger committed May 10, 2014
1 parent c3bebac commit 193f238
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 12 deletions.
1 change: 1 addition & 0 deletions doc/source/release.rst
Expand Up @@ -491,6 +491,7 @@ Bug Fixes
- Bug in ``unstack`` raises ``ValueError`` when ``MultiIndex`` contains ``PeriodIndex`` (:issue:`4342`)
- Bug in ``boxplot`` and ``hist`` draws unnecessary axes (:issue:`6769`)
- Regression in ``groupby.nth()`` for out-of-bounds indexers (:issue:`6621`)
- Bug in ``quantile`` with datetime values (:issue:`6965`)

pandas 0.13.1
-------------
Expand Down
44 changes: 32 additions & 12 deletions pandas/core/frame.py
Expand Up @@ -4188,23 +4188,43 @@ def quantile(self, q=0.5, axis=0, numeric_only=True):
"""
per = np.asarray(q) * 100

if not com.is_list_like(per):
per = [per]
q = [q]
squeeze = True
else:
squeeze = False

def f(arr, per):
arr = arr.values
if arr.dtype != np.float_:
arr = arr.astype(float)
arr = arr[notnull(arr)]
if len(arr) == 0:
if arr._is_datelike_mixed_type:
values = _values_from_object(arr).view('i8')
else:
values = arr.astype(float)
values = values[notnull(values)]
if len(values) == 0:
return NA
else:
return _quantile(arr, per)
return _quantile(values, per)

data = self._get_numeric_data() if numeric_only else self
if com.is_list_like(per):
from pandas.tools.merge import concat
return concat([data.apply(f, axis=axis, args=(x,)) for x in per],
axis=1, keys=per/100.).T
else:
return data.apply(f, axis=axis, args=(per,))

# need to know which cols are timestamp going in so that we can
# map timestamp over them after getting the quantile.
is_dt_col = data.dtypes.map(com.is_datetime64_dtype)
is_dt_col = is_dt_col[is_dt_col].index

quantiles = [[f(vals, x) for x in per]
for (_, vals) in data.iteritems()]
result = DataFrame(quantiles, index=data._info_axis, columns=q).T
if len(is_dt_col) > 0:
result[is_dt_col] = result[is_dt_col].applymap(lib.Timestamp)
if squeeze:
if result.shape == (1, 1):
result = result.T.iloc[:, 0] # don't want scalar
else:
result = result.T.squeeze()
result.name = None # For groupby, so it can set an index name
return result

def rank(self, axis=0, numeric_only=None, method='average',
na_option='keep', ascending=True, pct=False):
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/test_frame.py
Expand Up @@ -10994,6 +10994,25 @@ def test_quantile_multi(self):
index=[.1, .9])
assert_frame_equal(result, expected)

def test_quantile_datetime(self):
df = DataFrame({'a': pd.to_datetime(['2010', '2011']), 'b': [0, 5]})

# exclude datetime
result = df.quantile(.5)
expected = Series([2.5], index=['b'])

# datetime
result = df.quantile(.5, numeric_only=False)
expected = Series([Timestamp('2010-07-02 12:00:00'), 2.5],
index=['a', 'b'])
assert_series_equal(result, expected)

# datetime w/ multi
result = df.quantile([.5], numeric_only=False)
expected = DataFrame([[Timestamp('2010-07-02 12:00:00'), 2.5]],
index=[.5], columns=['a', 'b'])
assert_frame_equal(result, expected)

def test_cumsum(self):
self.tsframe.ix[5:10, 0] = nan
self.tsframe.ix[10:15, 1] = nan
Expand Down

0 comments on commit 193f238

Please sign in to comment.