diff --git a/pandas/core/base.py b/pandas/core/base.py index b8ee50765e0703..8af4b59c4634bd 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -973,10 +973,16 @@ def _ndarray_values(self): def empty(self): return not self.size - def max(self): + def max(self, axis=None, skipna=True): """ Return the maximum value of the Index. + Parameters + ---------- + axis : int, optional + For compatibility with NumPy. Only 0 or None are allowed. + skipna : bool, default True + Returns ------- scalar @@ -1004,22 +1010,36 @@ def max(self): >>> idx.max() ('b', 2) """ - return nanops.nanmax(self.values) + nv.validate_minmax_axis(axis) + return nanops.nanmax(self._values, skipna=skipna) - def argmax(self, axis=None): + def argmax(self, axis=None, skipna=True): """ Return a ndarray of the maximum argument indexer. + Parameters + ---------- + axis : {None} + Dummy argument for consistency with Series + skipna : bool, default True + See Also -------- numpy.ndarray.argmax """ - return nanops.nanargmax(self.values) + nv.validate_minmax_axis(axis) + return nanops.nanargmax(self._values, skipna=skipna) - def min(self): + def min(self, axis=None, skipna=True): """ Return the minimum value of the Index. + Parameters + ---------- + axis : {None} + Dummy argument for consistency with Series + skipna : bool, default True + Returns ------- scalar @@ -1047,17 +1067,25 @@ def min(self): >>> idx.min() ('a', 1) """ - return nanops.nanmin(self.values) + nv.validate_minmax_axis(axis) + return nanops.nanmin(self._values, skipna=skipna) - def argmin(self, axis=None): + def argmin(self, axis=None, skipna=True): """ Return a ndarray of the minimum argument indexer. + Parameters + ---------- + axis : {None} + Dummy argument for consistency with Series + skipna : bool, default True + See Also -------- numpy.ndarray.argmin """ - return nanops.nanargmin(self.values) + nv.validate_minmax_axis(axis) + return nanops.nanargmin(self._values, skipna=skipna) def tolist(self): """ @@ -1110,7 +1138,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, if func is None: raise TypeError("{klass} cannot perform the operation {op}".format( klass=self.__class__.__name__, op=name)) - return func(**kwds) + return func(skipna=skipna, **kwds) def _map_values(self, mapper, na_action=None): """ diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 1d0ea034559492..21ec14ace3e441 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -198,6 +198,8 @@ def _isna_ndarraylike(obj): else: values = obj result = values.isna() + elif isinstance(obj, ABCDatetimeArray): + return obj.isna() elif is_string_dtype(dtype): # Working around NumPy ticket 1542 shape = values.shape diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 4dccf4be4edad2..3810f204185fd2 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -267,7 +267,7 @@ def tolist(self): """ return list(self.astype(object)) - def min(self, axis=None, *args, **kwargs): + def min(self, axis=None, skipna=True, *args, **kwargs): """ Return the minimum value of the Index or minimum along an axis. @@ -275,27 +275,33 @@ def min(self, axis=None, *args, **kwargs): See Also -------- numpy.ndarray.min + Series.min : Return the minimum value in a Series. """ nv.validate_min(args, kwargs) nv.validate_minmax_axis(axis) - try: - i8 = self.asi8 + if not len(self): + return self._na_value + i8 = self.asi8 + try: # quick check if len(i8) and self.is_monotonic: if i8[0] != iNaT: return self._box_func(i8[0]) if self.hasnans: - min_stamp = self[~self._isnan].asi8.min() + if skipna: + min_stamp = self[~self._isnan].asi8.min() + else: + return self._na_value else: min_stamp = i8.min() return self._box_func(min_stamp) except ValueError: return self._na_value - def argmin(self, axis=None, *args, **kwargs): + def argmin(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the minimum values along an axis. @@ -312,13 +318,13 @@ def argmin(self, axis=None, *args, **kwargs): i8 = self.asi8 if self.hasnans: mask = self._isnan - if mask.all(): + if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = np.iinfo('int64').max return i8.argmin() - def max(self, axis=None, *args, **kwargs): + def max(self, axis=None, skipna=True, *args, **kwargs): """ Return the maximum value of the Index or maximum along an axis. @@ -326,27 +332,33 @@ def max(self, axis=None, *args, **kwargs): See Also -------- numpy.ndarray.max + Series.max : Return the maximum value in a Series. """ nv.validate_max(args, kwargs) nv.validate_minmax_axis(axis) - try: - i8 = self.asi8 + if not len(self): + return self._na_value + i8 = self.asi8 + try: # quick check if len(i8) and self.is_monotonic: if i8[-1] != iNaT: return self._box_func(i8[-1]) if self.hasnans: - max_stamp = self[~self._isnan].asi8.max() + if skipna: + max_stamp = self[~self._isnan].asi8.max() + else: + return self._na_value else: max_stamp = i8.max() return self._box_func(max_stamp) except ValueError: return self._na_value - def argmax(self, axis=None, *args, **kwargs): + def argmax(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the maximum values along an axis. @@ -363,7 +375,7 @@ def argmax(self, axis=None, *args, **kwargs): i8 = self.asi8 if self.hasnans: mask = self._isnan - if mask.all(): + if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = 0 diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0da924de244edb..110c9f4025bd8c 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -297,12 +297,14 @@ def _minmax(self, meth): return self._start + self._step * no_steps - def min(self): + def min(self, axis=None, skipna=True): """The minimum value of the RangeIndex""" + nv.validate_minmax_axis(axis) return self._minmax('min') - def max(self): + def max(self, axis=None, skipna=True): """The maximum value of the RangeIndex""" + nv.validate_minmax_axis(axis) return self._minmax('max') def argsort(self, *args, **kwargs): diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 027f458614bd8b..f95c133163ddbd 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -6,14 +6,14 @@ import numpy as np -from pandas._libs import lib, tslibs +from pandas._libs import iNaT, lib, tslibs import pandas.compat as compat from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask from pandas.core.dtypes.common import ( _get_dtype, is_any_int_dtype, is_bool_dtype, is_complex, is_complex_dtype, - is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_float, - is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, + is_float, is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype, is_object_dtype, is_scalar, is_timedelta64_dtype) from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna @@ -203,7 +203,15 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, if necessary copy and mask using the specified fill_value copy = True will force the copy """ - values = com.values_from_object(values) + + if is_datetime64tz_dtype(values): + # com.values_from_object returns M8[ns] dtype instead of tz-aware, + # so this case must be handled separately from the rest + dtype = values.dtype + values = getattr(values, "_values", values) + else: + values = com.values_from_object(values) + dtype = values.dtype if mask is None: if isfinite: @@ -211,7 +219,12 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, else: mask = isna(values) - dtype = values.dtype + if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values): + # changing timedelta64/datetime64 to int64 needs to happen after + # finding `mask` above + values = getattr(values, "asi8", values) + values = values.view(np.int64) + dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative @@ -232,8 +245,6 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, elif copy: values = values.copy() - values = _view_if_needed(values) - # return a platform independent precision dtype dtype_max = dtype if is_integer_dtype(dtype) or is_bool_dtype(dtype): @@ -259,21 +270,19 @@ def _na_ok_dtype(dtype): (np.integer, np.timedelta64, np.datetime64)) -def _view_if_needed(values): - if is_datetime_or_timedelta_dtype(values): - return values.view(np.int64) - return values - - def _wrap_results(result, dtype, fill_value=None): """ wrap our results if needed """ - if is_datetime64_dtype(dtype): + if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): + if fill_value is None: + # GH#24293 + fill_value = iNaT if not isinstance(result, np.ndarray): + tz = getattr(dtype, 'tz', None) assert not isna(fill_value), "Expected non-null fill_value" if result == fill_value: result = np.nan - result = tslibs.Timestamp(result) + result = tslibs.Timestamp(result, tz=tz) else: result = result.view(dtype) elif is_timedelta64_dtype(dtype): @@ -426,7 +435,6 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None): return _wrap_results(the_sum, dtype) -@disallow('M8') @bottleneck_switch() def nanmean(values, axis=None, skipna=True, mask=None): """ @@ -457,7 +465,8 @@ def nanmean(values, axis=None, skipna=True, mask=None): values, skipna, 0, mask=mask) dtype_sum = dtype_max dtype_count = np.float64 - if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype): + if (is_integer_dtype(dtype) or is_timedelta64_dtype(dtype) or + is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype)): dtype_sum = np.float64 elif is_float_dtype(dtype): dtype_sum = dtype @@ -466,7 +475,9 @@ def nanmean(values, axis=None, skipna=True, mask=None): the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum)) if axis is not None and getattr(the_sum, 'ndim', False): - the_mean = the_sum / count + with np.errstate(all="ignore"): + # suppress division by zero warnings + the_mean = the_sum / count ct_mask = count == 0 if ct_mask.any(): the_mean[ct_mask] = np.nan diff --git a/pandas/core/series.py b/pandas/core/series.py index 456578ea17f8a1..762b957dc990e5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -17,9 +17,10 @@ from pandas.core.dtypes.common import ( _is_unorderable_exception, ensure_platform_int, is_bool, - is_categorical_dtype, is_datetime64tz_dtype, is_datetimelike, is_dict_like, - is_extension_array_dtype, is_extension_type, is_hashable, is_integer, - is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype) + is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, + is_datetimelike, is_dict_like, is_extension_array_dtype, is_extension_type, + is_hashable, is_integer, is_iterator, is_list_like, is_scalar, + is_string_like, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries) from pandas.core.dtypes.missing import ( @@ -3537,6 +3538,9 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, # dispatch to ExtensionArray interface if isinstance(delegate, ExtensionArray): return delegate._reduce(name, skipna=skipna, **kwds) + elif is_datetime64_dtype(delegate): + # use DatetimeIndex implementation to handle skipna correctly + delegate = DatetimeIndex(delegate) # dispatch to numpy arrays elif isinstance(delegate, np.ndarray): diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 5dbec9490a1157..7cb62c275e6211 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -895,10 +895,18 @@ def test_max_min(self, start, stop, step): result = idx.max() assert result == expected + # skipna should be irrelevant since RangeIndex should never have NAs + result2 = idx.max(skipna=False) + assert result2 == expected + expected = idx._int64index.min() result = idx.min() assert result == expected + # skipna should be irrelevant since RangeIndex should never have NAs + result2 = idx.min(skipna=False) + assert result2 == expected + # empty idx = RangeIndex(start, stop, -step) assert isna(idx.max()) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index e7f984919d80b4..d27308029fa19a 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -56,23 +56,54 @@ def test_nanops(self): # GH#7261 for opname in ['max', 'min']: for klass in [Index, Series]: + arg_op = 'arg' + opname if klass is Index else 'idx' + opname obj = klass([np.nan, 2.0]) assert getattr(obj, opname)() == 2.0 obj = klass([np.nan]) assert pd.isna(getattr(obj, opname)()) + assert pd.isna(getattr(obj, opname)(skipna=False)) obj = klass([]) assert pd.isna(getattr(obj, opname)()) + assert pd.isna(getattr(obj, opname)(skipna=False)) obj = klass([pd.NaT, datetime(2011, 11, 1)]) # check DatetimeIndex monotonic path assert getattr(obj, opname)() == datetime(2011, 11, 1) + assert getattr(obj, opname)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) # check DatetimeIndex non-monotonic path assert getattr(obj, opname)(), datetime(2011, 11, 1) + assert getattr(obj, opname)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + for dtype in ["M8[ns]", "datetime64[ns, UTC]"]: + # cases with empty Series/DatetimeIndex + obj = klass([], dtype=dtype) + + assert getattr(obj, opname)() is pd.NaT + assert getattr(obj, opname)(skipna=False) is pd.NaT + + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)() + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)(skipna=False) # argmin/max obj = Index(np.arange(5, dtype='int64')) @@ -82,19 +113,27 @@ def test_nanops(self): obj = Index([np.nan, 1, np.nan, 2]) assert obj.argmin() == 1 assert obj.argmax() == 3 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 obj = Index([np.nan]) assert obj.argmin() == -1 assert obj.argmax() == -1 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 obj = Index([pd.NaT, datetime(2011, 11, 1), datetime(2011, 11, 2), pd.NaT]) assert obj.argmin() == 1 assert obj.argmax() == 2 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 obj = Index([pd.NaT]) assert obj.argmin() == -1 assert obj.argmax() == -1 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 class TestSeriesReductions(object): @@ -290,6 +329,8 @@ def test_empty_timeseries_reductions_return_nat(self): for dtype in ('m8[ns]', 'm8[ns]', 'M8[ns]', 'M8[ns, UTC]'): assert Series([], dtype=dtype).min() is pd.NaT assert Series([], dtype=dtype).max() is pd.NaT + assert Series([], dtype=dtype).min(skipna=False) is pd.NaT + assert Series([], dtype=dtype).max(skipna=False) is pd.NaT def test_numpy_argmin_deprecated(self): # See GH#16830 @@ -558,6 +599,8 @@ def test_minmax_nat_series(self, nat_ser): # GH#23282 assert nat_ser.min() is pd.NaT assert nat_ser.max() is pd.NaT + assert nat_ser.min(skipna=False) is pd.NaT + assert nat_ser.max(skipna=False) is pd.NaT @pytest.mark.parametrize('nat_df', [ pd.DataFrame([pd.NaT, pd.NaT]), @@ -567,6 +610,8 @@ def test_minmax_nat_dataframe(self, nat_df): # GH#23282 assert nat_df.min()[0] is pd.NaT assert nat_df.max()[0] is pd.NaT + assert nat_df.min(skipna=False)[0] is pd.NaT + assert nat_df.max(skipna=False)[0] is pd.NaT def test_min_max(self): rng = pd.date_range('1/1/2000', '12/31/2000') diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index 1146e0793d4f5e..11ecd03f6c7e11 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -28,8 +28,8 @@ def _check_stat_op(self, name, alternate, string_series_, # add some NaNs string_series_[5:15] = np.NaN - # idxmax, idxmin, min, and max are valid for dates - if name not in ['max', 'min']: + # mean, idxmax, idxmin, min, and max are valid for dates + if name not in ['max', 'min', 'mean']: ds = Series(pd.date_range('1/1/2001', periods=10)) with pytest.raises(TypeError): f(ds) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index e214d4c1985a97..1e089148114027 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -14,6 +14,7 @@ from pandas import Series, isna from pandas.compat.numpy import _np_version_under1p13 from pandas.core.dtypes.common import is_integer_dtype +from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray use_bn = nanops._USE_BOTTLENECK @@ -998,6 +999,23 @@ def prng(self): return np.random.RandomState(1234) +class TestDatetime64NaNOps(object): + @pytest.mark.parametrize('tz', [None, 'UTC']) + def test_nanmean(self, tz): + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + expected = dti[1] + + for obj in [dti, DatetimeArray(dti), Series(dti)]: + result = nanops.nanmean(obj) + assert result == expected + + dti2 = dti.insert(1, pd.NaT) + + for obj in [dti2, DatetimeArray(dti2), Series(dti2)]: + result = nanops.nanmean(obj) + assert result == expected + + def test_use_bottleneck(): if nanops._BOTTLENECK_INSTALLED: