Skip to content

Commit

Permalink
BUG: Casting tz-aware DatetimeIndex to object-dtype ndarray/Index (pa…
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and Pingviinituutti committed Feb 28, 2019
1 parent 207b13b commit 4821e80
Show file tree
Hide file tree
Showing 9 changed files with 149 additions and 14 deletions.
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,9 @@ Datetimelike
- Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`)
- Bug in :class:`Series` that interpreted string indices as lists of characters when setting datetimelike values (:issue:`23451`)
- Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`)
- Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`)
- Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and `dtype=object` would incorrectly raise a ``ValueError`` (:issue:`23524`)
- Bug in :class:`Index` where calling ``np.array(dtindex, dtype=object)`` on a timezone-naive :class:`DatetimeIndex` would return an array of ``datetime`` objects instead of :class:`Timestamp` objects, potentially losing nanosecond portions of the timestamps (:issue:`23524`)

Timedelta
^^^^^^^^^
Expand Down Expand Up @@ -1174,6 +1177,7 @@ Offsets
- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`)
- Bug in :class:`DateOffset` where keyword arguments ``week`` and ``milliseconds`` were accepted and ignored. Passing these will now raise ``ValueError`` (:issue:`19398`)
- Bug in adding :class:`DateOffset` with :class:`DataFrame` or :class:`PeriodIndex` incorrectly raising ``TypeError`` (:issue:`23215`)
- Bug in comparing :class:`DateOffset` objects with non-DateOffset objects, particularly strings, raising ``ValueError`` instead of returning ``False`` for equality checks and ``True`` for not-equal checks (:issue:`23524`)

Numeric
^^^^^^^
Expand Down
9 changes: 7 additions & 2 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,13 @@ class _BaseOffset(object):

def __eq__(self, other):
if is_string_object(other):
other = to_offset(other)

try:
# GH#23524 if to_offset fails, we are dealing with an
# incomparable type so == is False and != is True
other = to_offset(other)
except ValueError:
# e.g. "infer"
return False
try:
return self._params == other._params
except AttributeError:
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pandas.core.dtypes.common import (
_NS_DTYPE,
is_object_dtype,
is_int64_dtype,
is_datetime64tz_dtype,
is_datetime64_dtype,
ensure_int64)
Expand Down Expand Up @@ -388,6 +389,15 @@ def _resolution(self):
# ----------------------------------------------------------------
# Array-like Methods

def __array__(self, dtype=None):
if is_object_dtype(dtype):
return np.array(list(self), dtype=object)
elif is_int64_dtype(dtype):
return self.asi8

# TODO: warn that conversion may be lossy?
return self._data.view(np.ndarray) # follow Index.__array__

def __iter__(self):
"""
Return an iterator over the boxed values
Expand Down
14 changes: 11 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,11 +301,19 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
(dtype is not None and is_datetime64_any_dtype(dtype)) or
'tz' in kwargs):
from pandas import DatetimeIndex
result = DatetimeIndex(data, copy=copy, name=name,
dtype=dtype, **kwargs)

if dtype is not None and is_dtype_equal(_o_dtype, dtype):
return Index(result.to_pydatetime(), dtype=_o_dtype)
# GH#23524 passing `dtype=object` to DatetimeIndex is invalid,
# will raise in the where `data` is already tz-aware. So
# we leave it out of this step and cast to object-dtype after
# the DatetimeIndex construction.
# Note we can pass copy=False because the .astype below
# will always make a copy
result = DatetimeIndex(data, copy=False, name=name, **kwargs)
return result.astype(object)
else:
result = DatetimeIndex(data, copy=copy, name=name,
dtype=dtype, **kwargs)
return result

elif (is_timedelta64_dtype(data) or
Expand Down
48 changes: 48 additions & 0 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,54 @@ def timedelta_index(request):

class TestDatetimeArray(object):

def test_array_object_dtype(self, tz_naive_fixture):
# GH#23524
tz = tz_naive_fixture
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
arr = DatetimeArrayMixin(dti)

expected = np.array(list(dti))

result = np.array(arr, dtype=object)
tm.assert_numpy_array_equal(result, expected)

# also test the DatetimeIndex method while we're at it
result = np.array(dti, dtype=object)
tm.assert_numpy_array_equal(result, expected)

def test_array(self, tz_naive_fixture):
# GH#23524
tz = tz_naive_fixture
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
arr = DatetimeArrayMixin(dti)

expected = dti.asi8.view('M8[ns]')
result = np.array(arr)
tm.assert_numpy_array_equal(result, expected)

# check that we are not making copies when setting copy=False
result = np.array(arr, copy=False)
assert result.base is expected.base
assert result.base is not None

def test_array_i8_dtype(self, tz_naive_fixture):
# GH#23524
tz = tz_naive_fixture
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
arr = DatetimeArrayMixin(dti)

expected = dti.asi8
result = np.array(arr, dtype='i8')
tm.assert_numpy_array_equal(result, expected)

result = np.array(arr, dtype=np.int64)
tm.assert_numpy_array_equal(result, expected)

# check that we are not making copies when setting copy=False
result = np.array(arr, dtype='i8', copy=False)
assert result.base is expected.base
assert result.base is not None

def test_from_dti(self, tz_naive_fixture):
tz = tz_naive_fixture
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def test_construction_list_tuples_nan(self, na_value, vtype):
@pytest.mark.parametrize("cast_as_obj", [True, False])
@pytest.mark.parametrize("index", [
pd.date_range('2015-01-01 10:00', freq='D', periods=3,
tz='US/Eastern'), # DTI with tz
tz='US/Eastern', name='Green Eggs & Ham'), # DTI with tz
pd.date_range('2015-01-01 10:00', freq='D', periods=3), # DTI no tz
pd.timedelta_range('1 days', freq='D', periods=3), # td
pd.period_range('2015-01-01', freq='D', periods=3) # period
Expand All @@ -145,8 +145,16 @@ def test_constructor_from_index_dtlike(self, cast_as_obj, index):

tm.assert_index_equal(result, index)

if isinstance(index, pd.DatetimeIndex) and hasattr(index, 'tz'):
if isinstance(index, pd.DatetimeIndex):
assert result.tz == index.tz
if cast_as_obj:
# GH#23524 check that Index(dti, dtype=object) does not
# incorrectly raise ValueError, and that nanoseconds are not
# dropped
index += pd.Timedelta(nanoseconds=50)
result = pd.Index(index, dtype=object)
assert result.dtype == np.object_
assert list(result) == list(index)

@pytest.mark.parametrize("index,has_tz", [
(pd.date_range('2015-01-01 10:00', freq='D', periods=3,
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/tseries/offsets/test_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,19 @@ def testMult2(self):
assert self.d + (-5 * self._offset(-10)) == self.d + self._offset(50)
assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6)

def test_compare_str(self):
# GH#23524
# comparing to strings that cannot be cast to DateOffsets should
# not raise for __eq__ or __ne__
if self._offset is None:
return
off = self._get_offset(self._offset)

assert not off == "infer"
assert off != "foo"
# Note: inequalities are only implemented for Tick subclasses;
# tests for this are in test_ticks


class TestCommon(Base):
# exected value created by Base._get_offset
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/tseries/offsets/test_ticks.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,25 @@ def test_compare_ticks(cls):
assert cls(4) > three
assert cls(3) == cls(3)
assert cls(3) != cls(4)


@pytest.mark.parametrize('cls', tick_classes)
def test_compare_ticks_to_strs(cls):
# GH#23524
off = cls(19)

# These tests should work with any strings, but we particularly are
# interested in "infer" as that comparison is convenient to make in
# Datetime/Timedelta Array/Index constructors
assert not off == "infer"
assert not "foo" == off

for left, right in [("infer", off), (off, "infer")]:
with pytest.raises(TypeError):
left < right
with pytest.raises(TypeError):
left <= right
with pytest.raises(TypeError):
left > right
with pytest.raises(TypeError):
left >= right
31 changes: 24 additions & 7 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2199,9 +2199,18 @@ def apply_index(self, i):


def _tick_comp(op):
assert op not in [operator.eq, operator.ne]

def f(self, other):
return op(self.delta, other.delta)
try:
return op(self.delta, other.delta)
except AttributeError:
# comparing with a non-Tick object
raise TypeError("Invalid comparison between {cls} and {typ}"
.format(cls=type(self).__name__,
typ=type(other).__name__))

f.__name__ = '__{opname}__'.format(opname=op.__name__)
return f


Expand All @@ -2220,8 +2229,6 @@ def __init__(self, n=1, normalize=False):
__ge__ = _tick_comp(operator.ge)
__lt__ = _tick_comp(operator.lt)
__le__ = _tick_comp(operator.le)
__eq__ = _tick_comp(operator.eq)
__ne__ = _tick_comp(operator.ne)

def __add__(self, other):
if isinstance(other, Tick):
Expand All @@ -2242,8 +2249,13 @@ def __add__(self, other):
def __eq__(self, other):
if isinstance(other, compat.string_types):
from pandas.tseries.frequencies import to_offset

other = to_offset(other)
try:
# GH#23524 if to_offset fails, we are dealing with an
# incomparable type so == is False and != is True
other = to_offset(other)
except ValueError:
# e.g. "infer"
return False

if isinstance(other, Tick):
return self.delta == other.delta
Expand All @@ -2258,8 +2270,13 @@ def __hash__(self):
def __ne__(self, other):
if isinstance(other, compat.string_types):
from pandas.tseries.frequencies import to_offset

other = to_offset(other)
try:
# GH#23524 if to_offset fails, we are dealing with an
# incomparable type so == is False and != is True
other = to_offset(other)
except ValueError:
# e.g. "infer"
return True

if isinstance(other, Tick):
return self.delta != other.delta
Expand Down

0 comments on commit 4821e80

Please sign in to comment.