Skip to content

Commit

Permalink
BUG: Indexing with UTC offset string no longer ignored (pandas-dev#25263
Browse files Browse the repository at this point in the history
)
  • Loading branch information
mroeschke authored and Pingviinituutti committed Feb 28, 2019
1 parent a91ec21 commit b18f7b2
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 45 deletions.
10 changes: 10 additions & 0 deletions doc/source/user_guide/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,16 @@ We are stopping on the included end-point as it is part of the index:
dft2 = dft2.swaplevel(0, 1).sort_index()
dft2.loc[idx[:, '2013-01-05'], :]
.. versionadded:: 0.25.0

Slicing with string indexing also honors UTC offset.

.. ipython:: python
df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific'))
df
df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00']
.. _timeseries.slice_vs_exact_match:

Slice vs. Exact Match
Expand Down
34 changes: 32 additions & 2 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,37 @@ Other Enhancements
Backwards incompatible API changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

- :meth:`Timestamp.strptime` will now raise a NotImplementedError (:issue:`25016`)
.. _whatsnew_0250.api_breaking.utc_offset_indexing:

Indexing with date strings with UTC offsets
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Indexing a :class:`DataFrame` or :class:`Series` with a :class:`DatetimeIndex` with a
date string with a UTC offset would previously ignore the UTC offset. Now, the UTC offset
is respected in indexing. (:issue:`24076`, :issue:`16785`)

*Previous Behavior*:

.. code-block:: ipython
In [1]: df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific'))
In [2]: df
Out[2]:
0
2019-01-01 00:00:00-08:00 0
In [3]: df['2019-01-01 00:00:00+04:00':'2019-01-01 01:00:00+04:00']
Out[3]:
0
2019-01-01 00:00:00-08:00 0
*New Behavior*:

.. ipython:: ipython

df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific'))
df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00']

.. _whatsnew_0250.api.other:

Expand All @@ -40,7 +70,7 @@ Other API Changes

- :class:`DatetimeTZDtype` will now standardize pytz timezones to a common timezone instance (:issue:`24713`)
- ``Timestamp`` and ``Timedelta`` scalars now implement the :meth:`to_numpy` method as aliases to :meth:`Timestamp.to_datetime64` and :meth:`Timedelta.to_timedelta64`, respectively. (:issue:`24653`)
-
- :meth:`Timestamp.strptime` will now rise a ``NotImplementedError`` (:issue:`25016`)
-

.. _whatsnew_0250.deprecations:
Expand Down
21 changes: 18 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
import numpy as np

from pandas._libs import (
Timedelta, algos as libalgos, index as libindex, join as libjoin, lib,
tslibs)
algos as libalgos, index as libindex, join as libjoin, lib)
from pandas._libs.lib import is_datetime_array
from pandas._libs.tslibs import OutOfBoundsDatetime, Timedelta, Timestamp
from pandas._libs.tslibs.timezones import tz_compare
import pandas.compat as compat
from pandas.compat import range, set_function_name, u
from pandas.compat.numpy import function as nv
Expand Down Expand Up @@ -447,7 +448,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
try:
return DatetimeIndex(subarr, copy=copy,
name=name, **kwargs)
except tslibs.OutOfBoundsDatetime:
except OutOfBoundsDatetime:
pass

elif inferred.startswith('timedelta'):
Expand Down Expand Up @@ -4867,6 +4868,20 @@ def slice_locs(self, start=None, end=None, step=None, kind=None):
# If it's a reverse slice, temporarily swap bounds.
start, end = end, start

# GH 16785: If start and end happen to be date strings with UTC offsets
# attempt to parse and check that the offsets are the same
if (isinstance(start, (compat.string_types, datetime))
and isinstance(end, (compat.string_types, datetime))):
try:
ts_start = Timestamp(start)
ts_end = Timestamp(end)
except (ValueError, TypeError):
pass
else:
if not tz_compare(ts_start.tzinfo, ts_end.tzinfo):
raise ValueError("Both dates must have the "
"same UTC offset")

start_slice = None
if start is not None:
start_slice = self.get_slice_bound(start, 'left', kind)
Expand Down
80 changes: 41 additions & 39 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@
from pandas.core.ops import get_op_result_name
import pandas.core.tools.datetimes as tools

from pandas.tseries import offsets
from pandas.tseries.frequencies import Resolution, to_offset
from pandas.tseries.offsets import CDay, prefix_mapping
from pandas.tseries.offsets import CDay, Nano, prefix_mapping


def _new_DatetimeIndex(cls, d):
Expand Down Expand Up @@ -852,54 +851,57 @@ def _parsed_string_to_bounds(self, reso, parsed):
lower, upper: pd.Timestamp
"""
valid_resos = {'year', 'month', 'quarter', 'day', 'hour', 'minute',
'second', 'minute', 'second', 'microsecond'}
if reso not in valid_resos:
raise KeyError
if reso == 'year':
return (Timestamp(datetime(parsed.year, 1, 1), tz=self.tz),
Timestamp(datetime(parsed.year, 12, 31, 23,
59, 59, 999999), tz=self.tz))
start = Timestamp(parsed.year, 1, 1)
end = Timestamp(parsed.year, 12, 31, 23, 59, 59, 999999)
elif reso == 'month':
d = ccalendar.get_days_in_month(parsed.year, parsed.month)
return (Timestamp(datetime(parsed.year, parsed.month, 1),
tz=self.tz),
Timestamp(datetime(parsed.year, parsed.month, d, 23,
59, 59, 999999), tz=self.tz))
start = Timestamp(parsed.year, parsed.month, 1)
end = Timestamp(parsed.year, parsed.month, d, 23, 59, 59, 999999)
elif reso == 'quarter':
qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead
d = ccalendar.get_days_in_month(parsed.year, qe) # at end of month
return (Timestamp(datetime(parsed.year, parsed.month, 1),
tz=self.tz),
Timestamp(datetime(parsed.year, qe, d, 23, 59,
59, 999999), tz=self.tz))
start = Timestamp(parsed.year, parsed.month, 1)
end = Timestamp(parsed.year, qe, d, 23, 59, 59, 999999)
elif reso == 'day':
st = datetime(parsed.year, parsed.month, parsed.day)
return (Timestamp(st, tz=self.tz),
Timestamp(Timestamp(st + offsets.Day(),
tz=self.tz).value - 1))
start = Timestamp(parsed.year, parsed.month, parsed.day)
end = start + timedelta(days=1) - Nano(1)
elif reso == 'hour':
st = datetime(parsed.year, parsed.month, parsed.day,
hour=parsed.hour)
return (Timestamp(st, tz=self.tz),
Timestamp(Timestamp(st + offsets.Hour(),
tz=self.tz).value - 1))
start = Timestamp(parsed.year, parsed.month, parsed.day,
parsed.hour)
end = start + timedelta(hours=1) - Nano(1)
elif reso == 'minute':
st = datetime(parsed.year, parsed.month, parsed.day,
hour=parsed.hour, minute=parsed.minute)
return (Timestamp(st, tz=self.tz),
Timestamp(Timestamp(st + offsets.Minute(),
tz=self.tz).value - 1))
start = Timestamp(parsed.year, parsed.month, parsed.day,
parsed.hour, parsed.minute)
end = start + timedelta(minutes=1) - Nano(1)
elif reso == 'second':
st = datetime(parsed.year, parsed.month, parsed.day,
hour=parsed.hour, minute=parsed.minute,
second=parsed.second)
return (Timestamp(st, tz=self.tz),
Timestamp(Timestamp(st + offsets.Second(),
tz=self.tz).value - 1))
start = Timestamp(parsed.year, parsed.month, parsed.day,
parsed.hour, parsed.minute, parsed.second)
end = start + timedelta(seconds=1) - Nano(1)
elif reso == 'microsecond':
st = datetime(parsed.year, parsed.month, parsed.day,
parsed.hour, parsed.minute, parsed.second,
parsed.microsecond)
return (Timestamp(st, tz=self.tz), Timestamp(st, tz=self.tz))
else:
raise KeyError
start = Timestamp(parsed.year, parsed.month, parsed.day,
parsed.hour, parsed.minute, parsed.second,
parsed.microsecond)
end = start + timedelta(microseconds=1) - Nano(1)
# GH 24076
# If an incoming date string contained a UTC offset, need to localize
# the parsed date to this offset first before aligning with the index's
# timezone
if parsed.tzinfo is not None:
if self.tz is None:
raise ValueError("The index must be timezone aware "
"when indexing with a date string with a "
"UTC offset")
start = start.tz_localize(parsed.tzinfo).tz_convert(self.tz)
end = end.tz_localize(parsed.tzinfo).tz_convert(self.tz)
elif self.tz is not None:
start = start.tz_localize(self.tz)
end = end.tz_localize(self.tz)
return start, end

def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True):
is_monotonic = self.is_monotonic
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/datetimes/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def test_stringified_slice_with_tz(self):
# GH#2658
import datetime
start = datetime.datetime.now()
idx = date_range(start=start, freq="1d", periods=10)
idx = date_range(start=start, freq="1d", periods=10, tz='US/Eastern')
df = DataFrame(lrange(10), index=idx)
df["2013-01-14 23:44:34.437768-05:00":] # no exception here

Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/indexes/datetimes/test_partial_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,3 +396,30 @@ def test_selection_by_datetimelike(self, datetimelike, op, expected):
result = op(df.A, datetimelike)
expected = Series(expected, name='A')
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('start', [
'2018-12-02 21:50:00+00:00', pd.Timestamp('2018-12-02 21:50:00+00:00'),
pd.Timestamp('2018-12-02 21:50:00+00:00').to_pydatetime()
])
@pytest.mark.parametrize('end', [
'2018-12-02 21:52:00+00:00', pd.Timestamp('2018-12-02 21:52:00+00:00'),
pd.Timestamp('2018-12-02 21:52:00+00:00').to_pydatetime()
])
def test_getitem_with_datestring_with_UTC_offset(self, start, end):
# GH 24076
idx = pd.date_range(start='2018-12-02 14:50:00-07:00',
end='2018-12-02 14:50:00-07:00', freq='1min')
df = pd.DataFrame(1, index=idx, columns=['A'])
result = df[start:end]
expected = df.iloc[0:3, :]
tm.assert_frame_equal(result, expected)

# GH 16785
start = str(start)
end = str(end)
with pytest.raises(ValueError, match="Both dates must"):
df[start:end[:-4] + '1:00']

with pytest.raises(ValueError, match="The index must be timezone"):
df = df.tz_localize(None)
df[start:end]

0 comments on commit b18f7b2

Please sign in to comment.