From 6244f35f77bd0d19ad64ad8aee4f07a83e7e5534 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 9 Nov 2018 01:22:53 -0800 Subject: [PATCH] TST: Tests and Helpers for Datetime/Period Arrays (#23502) --- pandas/_libs/tslibs/offsets.pyx | 1 + pandas/core/arrays/datetimelike.py | 3 ++ pandas/core/arrays/datetimes.py | 22 ++++++++--- pandas/core/arrays/period.py | 3 -- pandas/core/arrays/timedeltas.py | 2 + pandas/core/dtypes/generic.py | 4 ++ pandas/tests/arithmetic/conftest.py | 19 ++++++++++ pandas/tests/arithmetic/test_datetime64.py | 21 ++++++----- pandas/tests/arithmetic/test_period.py | 10 ++--- pandas/tests/arrays/test_datetimes.py | 43 ++++++++++++++++++++++ pandas/tests/dtypes/test_generic.py | 8 ++++ pandas/util/testing.py | 20 +++++++++- 12 files changed, 132 insertions(+), 24 deletions(-) create mode 100644 pandas/tests/arrays/test_datetimes.py diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 8f5887754e40d..f29d995136a81 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -346,6 +346,7 @@ class _BaseOffset(object): def __add__(self, other): if getattr(other, "_typ", None) in ["datetimeindex", "periodindex", + "datetimearray", "periodarray", "series", "period", "dataframe"]: # defer to the other class's implementation return other + self diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 7f1c86938a354..ed4309395ac1f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -200,6 +200,9 @@ def astype(self, dtype, copy=True): # ------------------------------------------------------------------ # Null Handling + def isna(self): + return self._isnan + @property # NB: override with cache_readonly in immutable subclasses def _isnan(self): """ return if each value is nan""" diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e7edd54c4177b..39a2c7e75027e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -117,28 +117,36 @@ def wrapper(self, other): return ops.invalid_comparison(self, other, op) else: if isinstance(other, list): - # FIXME: This can break for object-dtype with mixed types - other = type(self)(other) - elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)): + try: + other = type(self)(other) + except ValueError: + other = np.array(other, dtype=np.object_) + elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries, + DatetimeArrayMixin)): # Following Timestamp convention, __eq__ is all-False # and __ne__ is all True, others raise TypeError. return ops.invalid_comparison(self, other, op) if is_object_dtype(other): result = op(self.astype('O'), np.array(other)) + o_mask = isna(other) elif not (is_datetime64_dtype(other) or is_datetime64tz_dtype(other)): # e.g. is_timedelta64_dtype(other) return ops.invalid_comparison(self, other, op) else: self._assert_tzawareness_compat(other) - result = meth(self, np.asarray(other)) + if not hasattr(other, 'asi8'): + # ndarray, Series + other = type(self)(other) + result = meth(self, other) + o_mask = other._isnan result = com.values_from_object(result) # Make sure to pass an array to result[...]; indexing with # Series breaks with older version of numpy - o_mask = np.array(isna(other)) + o_mask = np.array(o_mask) if o_mask.any(): result[o_mask] = nat_result @@ -157,6 +165,7 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin): _freq _data """ + _typ = "datetimearray" _bool_ops = ['is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end', 'is_leap_year'] @@ -166,6 +175,9 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin): # by returning NotImplemented timetuple = None + # ensure that operations with numpy arrays defer to our implementation + __array_priority__ = 1000 + # ----------------------------------------------------------------- # Constructors diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 5a75f2706b218..482968fdb4766 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -403,9 +403,6 @@ def take(self, indices, allow_fill=False, fill_value=None): return type(self)(new_values, self.freq) - def isna(self): - return self._data == iNaT - def fillna(self, value=None, method=None, limit=None): # TODO(#20300) # To avoid converting to object, we re-implement here with the changes diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 9653121879c0d..0fd69abd96cfa 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -98,6 +98,8 @@ def wrapper(self, other): class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin): + _typ = "timedeltaarray" + @property def _box_func(self): return lambda x: Timedelta(x, unit='ns') diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index f6926a192a724..7a3ff5d295421 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -53,6 +53,10 @@ def _check(cls, inst): ('sparse_array', 'sparse_series')) ABCCategorical = create_pandas_abc_type("ABCCategorical", "_typ", ("categorical")) +ABCDatetimeArray = create_pandas_abc_type("ABCDatetimeArray", "_typ", + ("datetimearray")) +ABCTimedeltaArray = create_pandas_abc_type("ABCTimedeltaArray", "_typ", + ("timedeltaarray")) ABCPeriodArray = create_pandas_abc_type("ABCPeriodArray", "_typ", ("periodarray", )) ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period", )) diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py index cbe26a06d34c6..cf1abc6f79101 100644 --- a/pandas/tests/arithmetic/conftest.py +++ b/pandas/tests/arithmetic/conftest.py @@ -5,6 +5,7 @@ import pandas as pd from pandas.compat import long +from pandas.core.arrays import PeriodArray, DatetimeArrayMixin as DatetimeArray @pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) @@ -171,3 +172,21 @@ def box_df_broadcast_failure(request): the DataFrame operation tries to broadcast incorrectly. """ return request.param + + +@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, PeriodArray], + ids=lambda x: x.__name__) +def box_with_period(request): + """ + Like `box`, but specific to PeriodDtype for also testing PeriodArray + """ + return request.param + + +@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, DatetimeArray], + ids=lambda x: x.__name__) +def box_with_datetime(request): + """ + Like `box`, but specific to datetime64 for also testing DatetimeArray + """ + return request.param diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 4f1a26ae50c3b..c3ebd8f773aa6 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1037,10 +1037,10 @@ def test_dti_add_sub_float(self, op, other): with pytest.raises(TypeError): op(dti, other) - def test_dti_add_timestamp_raises(self, box): + def test_dti_add_timestamp_raises(self, box_with_datetime): # GH#22163 ensure DataFrame doesn't cast Timestamp to i8 idx = DatetimeIndex(['2011-01-01', '2011-01-02']) - idx = tm.box_expected(idx, box) + idx = tm.box_expected(idx, box_with_datetime) msg = "cannot add" with tm.assert_raises_regex(TypeError, msg): idx + Timestamp('2011-01-01') @@ -1152,16 +1152,17 @@ def test_dti_add_intarray_no_freq(self, box): # ------------------------------------------------------------- # Binary operations DatetimeIndex and timedelta-like - def test_dti_add_timedeltalike(self, tz_naive_fixture, two_hours, box): + def test_dti_add_timedeltalike(self, tz_naive_fixture, two_hours, + box_with_datetime): # GH#22005, GH#22163 check DataFrame doesn't raise TypeError tz = tz_naive_fixture rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) - rng = tm.box_expected(rng, box) + rng = tm.box_expected(rng, box_with_datetime) result = rng + two_hours expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) - expected = tm.box_expected(expected, box) + expected = tm.box_expected(expected, box_with_datetime) tm.assert_equal(result, expected) def test_dti_iadd_timedeltalike(self, tz_naive_fixture, two_hours): @@ -1431,13 +1432,13 @@ def test_sub_dti_dti(self): tm.assert_index_equal(result, expected) @pytest.mark.parametrize('freq', [None, 'D']) - def test_sub_period(self, freq, box): + def test_sub_period(self, freq, box_with_datetime): # GH#13078 # not supported, check TypeError p = pd.Period('2011-01-01', freq='D') idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq) - idx = tm.box_expected(idx, box) + idx = tm.box_expected(idx, box_with_datetime) with pytest.raises(TypeError): idx - p @@ -1779,7 +1780,7 @@ def test_dti_with_offset_series(self, tz_naive_fixture, names): res3 = dti - other tm.assert_series_equal(res3, expected_sub) - def test_dti_add_offset_tzaware(self, tz_aware_fixture, box): + def test_dti_add_offset_tzaware(self, tz_aware_fixture, box_with_datetime): # GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype timezone = tz_aware_fixture if timezone == 'US/Pacific': @@ -1792,8 +1793,8 @@ def test_dti_add_offset_tzaware(self, tz_aware_fixture, box): expected = DatetimeIndex(['2010-11-01 05:00', '2010-11-01 06:00', '2010-11-01 07:00'], freq='H', tz=timezone) - dates = tm.box_expected(dates, box) - expected = tm.box_expected(expected, box) + dates = tm.box_expected(dates, box_with_datetime) + expected = tm.box_expected(expected, box_with_datetime) # TODO: parametrize over the scalar being added? radd? sub? offset = dates + pd.offsets.Hour(5) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index c52112a4fa147..3595cf7a2522f 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -579,15 +579,15 @@ def test_pi_add_offset_n_gt1(self, box): result = per.freq + pi tm.assert_equal(result, expected) - def test_pi_add_offset_n_gt1_not_divisible(self, box): + def test_pi_add_offset_n_gt1_not_divisible(self, box_with_period): # GH#23215 # PeriodIndex with freq.n > 1 add offset with offset.n % freq.n != 0 pi = pd.PeriodIndex(['2016-01'], freq='2M') - pi = tm.box_expected(pi, box) + pi = tm.box_expected(pi, box_with_period) expected = pd.PeriodIndex(['2016-04'], freq='2M') - expected = tm.box_expected(expected, box) + expected = tm.box_expected(expected, box_with_period) result = pi + to_offset('3M') tm.assert_equal(result, expected) @@ -901,10 +901,10 @@ def test_pi_ops(self): tm.assert_index_equal(result, exp) @pytest.mark.parametrize('ng', ["str", 1.5]) - def test_pi_ops_errors(self, ng, box): + def test_pi_ops_errors(self, ng, box_with_period): idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], freq='M', name='idx') - obj = tm.box_expected(idx, box) + obj = tm.box_expected(idx, box_with_period) msg = r"unsupported operand type\(s\)" with tm.assert_raises_regex(TypeError, msg): diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py new file mode 100644 index 0000000000000..a15295cfbd81a --- /dev/null +++ b/pandas/tests/arrays/test_datetimes.py @@ -0,0 +1,43 @@ +""" +Tests for DatetimeArray +""" +import operator + +import numpy as np + +import pandas as pd +from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray +import pandas.util.testing as tm + + +class TestDatetimeArrayComparisons(object): + # TODO: merge this into tests/arithmetic/test_datetime64 once it is + # sufficiently robust + + def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators): + # arbitrary tz-naive DatetimeIndex + opname = all_compare_operators.strip('_') + op = getattr(operator, opname) + + dti = pd.date_range('2016-01-1', freq='MS', periods=9, tz=None) + arr = DatetimeArray(dti) + assert arr.freq == dti.freq + assert arr.tz == dti.tz + + right = dti + + expected = np.ones(len(arr), dtype=bool) + if opname in ['ne', 'gt', 'lt']: + # for these the comparisons should be all-False + expected = ~expected + + result = op(arr, arr) + tm.assert_numpy_array_equal(result, expected) + for other in [right, np.array(right)]: + # TODO: add list and tuple, and object-dtype once those + # are fixed in the constructor + result = op(arr, other) + tm.assert_numpy_array_equal(result, expected) + + result = op(other, arr) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 38d1143f3838b..53fa482bdeaef 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -19,6 +19,8 @@ class TestABCClasses(object): sparse_series = pd.Series([1, 2, 3]).to_sparse() sparse_array = pd.SparseArray(np.random.randn(10)) sparse_frame = pd.SparseDataFrame({'a': [1, -1, None]}) + datetime_array = pd.core.arrays.DatetimeArrayMixin(datetime_index) + timedelta_array = pd.core.arrays.TimedeltaArrayMixin(timedelta_index) def test_abc_types(self): assert isinstance(pd.Index(['a', 'b', 'c']), gt.ABCIndex) @@ -51,6 +53,12 @@ def test_abc_types(self): assert isinstance(pd.Interval(0, 1.5), gt.ABCInterval) assert not isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCInterval) + assert isinstance(self.datetime_array, gt.ABCDatetimeArray) + assert not isinstance(self.datetime_index, gt.ABCDatetimeArray) + + assert isinstance(self.timedelta_array, gt.ABCTimedeltaArray) + assert not isinstance(self.timedelta_index, gt.ABCTimedeltaArray) + def test_setattr_warnings(): # GH7175 - GOTCHA: You can't use dot notation to add a column... diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 96387349eecd7..09c5a68ec28c2 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -34,7 +34,9 @@ IntervalIndex, MultiIndex, Panel, PeriodIndex, RangeIndex, Series, TimedeltaIndex, bdate_range) from pandas.core.algorithms import take_1d -from pandas.core.arrays import ExtensionArray, IntervalArray, PeriodArray +from pandas.core.arrays import ( + DatetimeArrayMixin as DatetimeArray, ExtensionArray, IntervalArray, + PeriodArray, period_array) import pandas.core.common as com from pandas.io.common import urlopen @@ -1049,6 +1051,15 @@ def assert_period_array_equal(left, right, obj='PeriodArray'): assert_attr_equal('freq', left, right, obj=obj) +def assert_datetime_array_equal(left, right, obj='DatetimeArray'): + _check_isinstance(left, right, DatetimeArray) + + assert_numpy_array_equal(left._data, right._data, + obj='{obj}._data'.format(obj=obj)) + assert_attr_equal('freq', left, right, obj=obj) + assert_attr_equal('tz', left, right, obj=obj) + + def raise_assert_detail(obj, message, left, right, diff=None): __tracebackhide__ = True @@ -1546,6 +1557,8 @@ def assert_equal(left, right, **kwargs): assert_interval_array_equal(left, right, **kwargs) elif isinstance(left, PeriodArray): assert_period_array_equal(left, right, **kwargs) + elif isinstance(left, DatetimeArray): + assert_datetime_array_equal(left, right, **kwargs) elif isinstance(left, ExtensionArray): assert_extension_array_equal(left, right, **kwargs) elif isinstance(left, np.ndarray): @@ -1573,6 +1586,11 @@ def box_expected(expected, box_cls): expected = pd.Series(expected) elif box_cls is pd.DataFrame: expected = pd.Series(expected).to_frame() + elif box_cls is PeriodArray: + # the PeriodArray constructor is not as flexible as period_array + expected = period_array(expected) + elif box_cls is DatetimeArray: + expected = DatetimeArray(expected) elif box_cls is np.ndarray: expected = np.array(expected) else: