Skip to content

Commit

Permalink
Implement reductions from pandas-dev#24024 (pandas-dev#24484)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and Pingviinituutti committed Feb 28, 2019
1 parent 73bdcdc commit f56966a
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 0 deletions.
66 changes: 66 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna

from pandas.core import nanops
from pandas.core.algorithms import checked_add_with_arr, take, unique1d
import pandas.core.common as com

Expand Down Expand Up @@ -1381,6 +1382,71 @@ def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise',
)
return arg

# --------------------------------------------------------------
# Reductions

def _reduce(self, name, axis=0, skipna=True, **kwargs):
op = getattr(self, name, None)
if op:
return op(axis=axis, skipna=skipna, **kwargs)
else:
raise TypeError("cannot perform {name} with type {dtype}"
.format(name=name, dtype=self.dtype))
# TODO: use super(DatetimeLikeArrayMixin, self)._reduce
# after we subclass ExtensionArray

def min(self, axis=None, skipna=True, *args, **kwargs):
"""
Return the minimum value of the Array or minimum along
an axis.
See Also
--------
numpy.ndarray.min
Index.min : Return the minimum value in an Index.
Series.min : Return the minimum value in a Series.
"""
nv.validate_min(args, kwargs)
nv.validate_minmax_axis(axis)

result = nanops.nanmin(self.asi8, skipna=skipna, mask=self.isna())
if isna(result):
# Period._from_ordinal does not handle np.nan gracefully
return NaT
return self._box_func(result)

def max(self, axis=None, skipna=True, *args, **kwargs):
"""
Return the maximum value of the Array or maximum along
an axis.
See Also
--------
numpy.ndarray.max
Index.max : Return the maximum value in an Index.
Series.max : Return the maximum value in a Series.
"""
# TODO: skipna is broken with max.
# See https://github.com/pandas-dev/pandas/issues/24265
nv.validate_max(args, kwargs)
nv.validate_minmax_axis(axis)

mask = self.isna()
if skipna:
values = self[~mask].asi8
elif mask.any():
return NaT
else:
values = self.asi8

if not len(values):
# short-circut for empty max / min
return NaT

result = nanops.nanmax(values, skipna=skipna)
# Don't have to worry about NA `result`, since no NA went in.
return self._box_func(result)


DatetimeLikeArrayMixin._add_comparison_ops()

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,13 @@ def test_scalar_from_string(self):
result = arr._scalar_from_string(str(arr[0]))
assert result == arr[0]

def test_reduce_invalid(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')

with pytest.raises(TypeError, match='cannot perform'):
arr._reduce("not a method")

def test_searchsorted(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')
Expand Down
38 changes: 38 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,41 @@ def test_tz_dtype_matches(self):
result, _, _ = sequence_to_dt64ns(
arr, dtype=DatetimeTZDtype(tz="US/Central"))
tm.assert_numpy_array_equal(arr._data, result)


class TestReductions(object):

@pytest.mark.parametrize("tz", [None, "US/Central"])
def test_min_max(self, tz):
arr = DatetimeArray._from_sequence([
'2000-01-03',
'2000-01-03',
'NaT',
'2000-01-02',
'2000-01-05',
'2000-01-04',
], tz=tz)

result = arr.min()
expected = pd.Timestamp('2000-01-02', tz=tz)
assert result == expected

result = arr.max()
expected = pd.Timestamp('2000-01-05', tz=tz)
assert result == expected

result = arr.min(skipna=False)
assert result is pd.NaT

result = arr.max(skipna=False)
assert result is pd.NaT

@pytest.mark.parametrize("tz", [None, "US/Central"])
@pytest.mark.parametrize('skipna', [True, False])
def test_min_max_empty(self, skipna, tz):
arr = DatetimeArray._from_sequence([], tz=tz)
result = arr.min(skipna=skipna)
assert result is pd.NaT

result = arr.max(skipna=skipna)
assert result is pd.NaT
39 changes: 39 additions & 0 deletions pandas/tests/arrays/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,42 @@ def test_repr_large():
"Length: 1000, dtype: period[D]"
)
assert result == expected


# ----------------------------------------------------------------------------
# Reductions

class TestReductions(object):

def test_min_max(self):
arr = period_array([
'2000-01-03',
'2000-01-03',
'NaT',
'2000-01-02',
'2000-01-05',
'2000-01-04',
], freq='D')

result = arr.min()
expected = pd.Period('2000-01-02', freq='D')
assert result == expected

result = arr.max()
expected = pd.Period('2000-01-05', freq='D')
assert result == expected

result = arr.min(skipna=False)
assert result is pd.NaT

result = arr.max(skipna=False)
assert result is pd.NaT

@pytest.mark.parametrize('skipna', [True, False])
def test_min_max_empty(self, skipna):
arr = period_array([], freq='D')
result = arr.min(skipna=skipna)
assert result is pd.NaT

result = arr.max(skipna=skipna)
assert result is pd.NaT
31 changes: 31 additions & 0 deletions pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,34 @@ def test_setitem_clears_freq(self):
a = TimedeltaArray(pd.timedelta_range('1H', periods=2, freq='H'))
a[0] = pd.Timedelta("1H")
assert a.freq is None


class TestReductions(object):

def test_min_max(self):
arr = TimedeltaArray._from_sequence([
'3H', '3H', 'NaT', '2H', '5H', '4H',
])

result = arr.min()
expected = pd.Timedelta('2H')
assert result == expected

result = arr.max()
expected = pd.Timedelta('5H')
assert result == expected

result = arr.min(skipna=False)
assert result is pd.NaT

result = arr.max(skipna=False)
assert result is pd.NaT

@pytest.mark.parametrize('skipna', [True, False])
def test_min_max_empty(self, skipna):
arr = TimedeltaArray._from_sequence([])
result = arr.min(skipna=skipna)
assert result is pd.NaT

result = arr.max(skipna=skipna)
assert result is pd.NaT

0 comments on commit f56966a

Please sign in to comment.