Skip to content

Commit

Permalink
standardize signature for Index reductions, implement nanmean for dat…
Browse files Browse the repository at this point in the history
…etime64 dtypes (pandas-dev#24293)
  • Loading branch information
jbrockmendel authored and Pingviinituutti committed Feb 28, 2019
1 parent 6240255 commit dcf8129
Show file tree
Hide file tree
Showing 10 changed files with 176 additions and 46 deletions.
46 changes: 37 additions & 9 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,10 +973,16 @@ def _ndarray_values(self):
def empty(self):
return not self.size

def max(self):
def max(self, axis=None, skipna=True):
"""
Return the maximum value of the Index.
Parameters
----------
axis : int, optional
For compatibility with NumPy. Only 0 or None are allowed.
skipna : bool, default True
Returns
-------
scalar
Expand Down Expand Up @@ -1004,22 +1010,36 @@ def max(self):
>>> idx.max()
('b', 2)
"""
return nanops.nanmax(self.values)
nv.validate_minmax_axis(axis)
return nanops.nanmax(self._values, skipna=skipna)

def argmax(self, axis=None):
def argmax(self, axis=None, skipna=True):
"""
Return a ndarray of the maximum argument indexer.
Parameters
----------
axis : {None}
Dummy argument for consistency with Series
skipna : bool, default True
See Also
--------
numpy.ndarray.argmax
"""
return nanops.nanargmax(self.values)
nv.validate_minmax_axis(axis)
return nanops.nanargmax(self._values, skipna=skipna)

def min(self):
def min(self, axis=None, skipna=True):
"""
Return the minimum value of the Index.
Parameters
----------
axis : {None}
Dummy argument for consistency with Series
skipna : bool, default True
Returns
-------
scalar
Expand Down Expand Up @@ -1047,17 +1067,25 @@ def min(self):
>>> idx.min()
('a', 1)
"""
return nanops.nanmin(self.values)
nv.validate_minmax_axis(axis)
return nanops.nanmin(self._values, skipna=skipna)

def argmin(self, axis=None):
def argmin(self, axis=None, skipna=True):
"""
Return a ndarray of the minimum argument indexer.
Parameters
----------
axis : {None}
Dummy argument for consistency with Series
skipna : bool, default True
See Also
--------
numpy.ndarray.argmin
"""
return nanops.nanargmin(self.values)
nv.validate_minmax_axis(axis)
return nanops.nanargmin(self._values, skipna=skipna)

def tolist(self):
"""
Expand Down Expand Up @@ -1110,7 +1138,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
if func is None:
raise TypeError("{klass} cannot perform the operation {op}".format(
klass=self.__class__.__name__, op=name))
return func(**kwds)
return func(skipna=skipna, **kwds)

def _map_values(self, mapper, na_action=None):
"""
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ def _isna_ndarraylike(obj):
else:
values = obj
result = values.isna()
elif isinstance(obj, ABCDatetimeArray):
return obj.isna()
elif is_string_dtype(dtype):
# Working around NumPy ticket 1542
shape = values.shape
Expand Down
36 changes: 24 additions & 12 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,35 +267,41 @@ def tolist(self):
"""
return list(self.astype(object))

def min(self, axis=None, *args, **kwargs):
def min(self, axis=None, skipna=True, *args, **kwargs):
"""
Return the minimum value of the Index or minimum along
an axis.
See Also
--------
numpy.ndarray.min
Series.min : Return the minimum value in a Series.
"""
nv.validate_min(args, kwargs)
nv.validate_minmax_axis(axis)

try:
i8 = self.asi8
if not len(self):
return self._na_value

i8 = self.asi8
try:
# quick check
if len(i8) and self.is_monotonic:
if i8[0] != iNaT:
return self._box_func(i8[0])

if self.hasnans:
min_stamp = self[~self._isnan].asi8.min()
if skipna:
min_stamp = self[~self._isnan].asi8.min()
else:
return self._na_value
else:
min_stamp = i8.min()
return self._box_func(min_stamp)
except ValueError:
return self._na_value

def argmin(self, axis=None, *args, **kwargs):
def argmin(self, axis=None, skipna=True, *args, **kwargs):
"""
Returns the indices of the minimum values along an axis.
Expand All @@ -312,41 +318,47 @@ def argmin(self, axis=None, *args, **kwargs):
i8 = self.asi8
if self.hasnans:
mask = self._isnan
if mask.all():
if mask.all() or not skipna:
return -1
i8 = i8.copy()
i8[mask] = np.iinfo('int64').max
return i8.argmin()

def max(self, axis=None, *args, **kwargs):
def max(self, axis=None, skipna=True, *args, **kwargs):
"""
Return the maximum value of the Index or maximum along
an axis.
See Also
--------
numpy.ndarray.max
Series.max : Return the maximum value in a Series.
"""
nv.validate_max(args, kwargs)
nv.validate_minmax_axis(axis)

try:
i8 = self.asi8
if not len(self):
return self._na_value

i8 = self.asi8
try:
# quick check
if len(i8) and self.is_monotonic:
if i8[-1] != iNaT:
return self._box_func(i8[-1])

if self.hasnans:
max_stamp = self[~self._isnan].asi8.max()
if skipna:
max_stamp = self[~self._isnan].asi8.max()
else:
return self._na_value
else:
max_stamp = i8.max()
return self._box_func(max_stamp)
except ValueError:
return self._na_value

def argmax(self, axis=None, *args, **kwargs):
def argmax(self, axis=None, skipna=True, *args, **kwargs):
"""
Returns the indices of the maximum values along an axis.
Expand All @@ -363,7 +375,7 @@ def argmax(self, axis=None, *args, **kwargs):
i8 = self.asi8
if self.hasnans:
mask = self._isnan
if mask.all():
if mask.all() or not skipna:
return -1
i8 = i8.copy()
i8[mask] = 0
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,12 +297,14 @@ def _minmax(self, meth):

return self._start + self._step * no_steps

def min(self):
def min(self, axis=None, skipna=True):
"""The minimum value of the RangeIndex"""
nv.validate_minmax_axis(axis)
return self._minmax('min')

def max(self):
def max(self, axis=None, skipna=True):
"""The maximum value of the RangeIndex"""
nv.validate_minmax_axis(axis)
return self._minmax('max')

def argsort(self, *args, **kwargs):
Expand Down
47 changes: 29 additions & 18 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

import numpy as np

from pandas._libs import lib, tslibs
from pandas._libs import iNaT, lib, tslibs
import pandas.compat as compat

from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask
from pandas.core.dtypes.common import (
_get_dtype, is_any_int_dtype, is_bool_dtype, is_complex, is_complex_dtype,
is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_float,
is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype,
is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype,
is_float, is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype,
is_object_dtype, is_scalar, is_timedelta64_dtype)
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna

Expand Down Expand Up @@ -203,15 +203,28 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
if necessary copy and mask using the specified fill_value
copy = True will force the copy
"""
values = com.values_from_object(values)

if is_datetime64tz_dtype(values):
# com.values_from_object returns M8[ns] dtype instead of tz-aware,
# so this case must be handled separately from the rest
dtype = values.dtype
values = getattr(values, "_values", values)
else:
values = com.values_from_object(values)
dtype = values.dtype

if mask is None:
if isfinite:
mask = _isfinite(values)
else:
mask = isna(values)

dtype = values.dtype
if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values):
# changing timedelta64/datetime64 to int64 needs to happen after
# finding `mask` above
values = getattr(values, "asi8", values)
values = values.view(np.int64)

dtype_ok = _na_ok_dtype(dtype)

# get our fill value (in case we need to provide an alternative
Expand All @@ -232,8 +245,6 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
elif copy:
values = values.copy()

values = _view_if_needed(values)

# return a platform independent precision dtype
dtype_max = dtype
if is_integer_dtype(dtype) or is_bool_dtype(dtype):
Expand All @@ -259,21 +270,19 @@ def _na_ok_dtype(dtype):
(np.integer, np.timedelta64, np.datetime64))


def _view_if_needed(values):
if is_datetime_or_timedelta_dtype(values):
return values.view(np.int64)
return values


def _wrap_results(result, dtype, fill_value=None):
""" wrap our results if needed """

if is_datetime64_dtype(dtype):
if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
if fill_value is None:
# GH#24293
fill_value = iNaT
if not isinstance(result, np.ndarray):
tz = getattr(dtype, 'tz', None)
assert not isna(fill_value), "Expected non-null fill_value"
if result == fill_value:
result = np.nan
result = tslibs.Timestamp(result)
result = tslibs.Timestamp(result, tz=tz)
else:
result = result.view(dtype)
elif is_timedelta64_dtype(dtype):
Expand Down Expand Up @@ -426,7 +435,6 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None):
return _wrap_results(the_sum, dtype)


@disallow('M8')
@bottleneck_switch()
def nanmean(values, axis=None, skipna=True, mask=None):
"""
Expand Down Expand Up @@ -457,7 +465,8 @@ def nanmean(values, axis=None, skipna=True, mask=None):
values, skipna, 0, mask=mask)
dtype_sum = dtype_max
dtype_count = np.float64
if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype):
if (is_integer_dtype(dtype) or is_timedelta64_dtype(dtype) or
is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype)):
dtype_sum = np.float64
elif is_float_dtype(dtype):
dtype_sum = dtype
Expand All @@ -466,7 +475,9 @@ def nanmean(values, axis=None, skipna=True, mask=None):
the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))

if axis is not None and getattr(the_sum, 'ndim', False):
the_mean = the_sum / count
with np.errstate(all="ignore"):
# suppress division by zero warnings
the_mean = the_sum / count
ct_mask = count == 0
if ct_mask.any():
the_mean[ct_mask] = np.nan
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@

from pandas.core.dtypes.common import (
_is_unorderable_exception, ensure_platform_int, is_bool,
is_categorical_dtype, is_datetime64tz_dtype, is_datetimelike, is_dict_like,
is_extension_array_dtype, is_extension_type, is_hashable, is_integer,
is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype)
is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
is_datetimelike, is_dict_like, is_extension_array_dtype, is_extension_type,
is_hashable, is_integer, is_iterator, is_list_like, is_scalar,
is_string_like, is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries)
from pandas.core.dtypes.missing import (
Expand Down Expand Up @@ -3537,6 +3538,9 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
# dispatch to ExtensionArray interface
if isinstance(delegate, ExtensionArray):
return delegate._reduce(name, skipna=skipna, **kwds)
elif is_datetime64_dtype(delegate):
# use DatetimeIndex implementation to handle skipna correctly
delegate = DatetimeIndex(delegate)

# dispatch to numpy arrays
elif isinstance(delegate, np.ndarray):
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -895,10 +895,18 @@ def test_max_min(self, start, stop, step):
result = idx.max()
assert result == expected

# skipna should be irrelevant since RangeIndex should never have NAs
result2 = idx.max(skipna=False)
assert result2 == expected

expected = idx._int64index.min()
result = idx.min()
assert result == expected

# skipna should be irrelevant since RangeIndex should never have NAs
result2 = idx.min(skipna=False)
assert result2 == expected

# empty
idx = RangeIndex(start, stop, -step)
assert isna(idx.max())
Expand Down
Loading

0 comments on commit dcf8129

Please sign in to comment.