Skip to content

Commit

Permalink
BUG: DatetimeIndex with dayfirst/yearfirst and tz (pandas-dev#55813)
Browse files Browse the repository at this point in the history
* BUG: DatetimeIndex with dayfirst/yearfirst and tz

* GH ref
  • Loading branch information
jbrockmendel committed Nov 3, 2023
1 parent 5f5ee75 commit c2cdeaf
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 23 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ Categorical

Datetimelike
^^^^^^^^^^^^
- Bug in :class:`DatetimeIndex` construction when passing both a ``tz`` and either ``dayfirst`` or ``yearfirst`` ignoring dayfirst/yearfirst (:issue:`55813`)
- Bug in :class:`DatetimeIndex` when passing an object-dtype ndarray of float objects and a ``tz`` incorrectly localizing the result (:issue:`55780`)
- Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`)
- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`)
Expand Down
6 changes: 5 additions & 1 deletion pandas/_libs/tslib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,9 @@ def array_to_datetime(
# returned ndarray may be object dtype or datetime64[ns]

def array_to_datetime_with_tz(
values: npt.NDArray[np.object_], tz: tzinfo, creso: int
values: npt.NDArray[np.object_],
tz: tzinfo,
dayfirst: bool,
yearfirst: bool,
creso: int,
) -> npt.NDArray[np.int64]: ...
24 changes: 11 additions & 13 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ from pandas._libs.tslibs.conversion cimport (
_TSObject,
cast_from_unit,
convert_str_to_tsobject,
convert_to_tsobject,
get_datetime64_nanos,
parse_pydatetime,
)
Expand Down Expand Up @@ -673,7 +674,9 @@ cdef _array_to_datetime_object(
return oresult_nd, None


def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso):
def array_to_datetime_with_tz(
ndarray values, tzinfo tz, bint dayfirst, bint yearfirst, NPY_DATETIMEUNIT creso
):
"""
Vectorized analogue to pd.Timestamp(value, tz=tz)
Expand All @@ -689,7 +692,7 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso)
Py_ssize_t i, n = values.size
object item
int64_t ival
datetime ts
_TSObject tsobj

for i in range(n):
# Analogous to `item = values[i]`
Expand All @@ -700,17 +703,12 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso)
ival = NPY_NAT

else:
if PyDateTime_Check(item) and item.tzinfo is not None:
# We can't call Timestamp constructor with a tz arg, have to
# do 2-step
ts = Timestamp(item).tz_convert(tz)
else:
ts = Timestamp(item, tz=tz)
if ts is NaT:
ival = NPY_NAT
else:
ts = (<_Timestamp>ts)._as_creso(creso)
ival = ts._value
tsobj = convert_to_tsobject(
item, tz=tz, unit="ns", dayfirst=dayfirst, yearfirst=yearfirst, nanos=0
)
if tsobj.value != NPY_NAT:
tsobj.ensure_reso(creso, item, round_ok=True)
ival = tsobj.value

# Analogous to: result[i] = ival
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ cdef class _TSObject:
bint fold
NPY_DATETIMEUNIT creso

cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso, str val=*) except? -1
cdef int64_t ensure_reso(
self, NPY_DATETIMEUNIT creso, val=*, bint round_ok=*
) except? -1


cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
Expand Down
13 changes: 11 additions & 2 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,14 @@ cdef class _TSObject:
self.fold = 0
self.creso = NPY_FR_ns # default value

cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso, str val=None) except? -1:
cdef int64_t ensure_reso(
self, NPY_DATETIMEUNIT creso, val=None, bint round_ok=False
) except? -1:
if self.creso != creso:
try:
self.value = convert_reso(self.value, self.creso, creso, False)
self.value = convert_reso(
self.value, self.creso, creso, round_ok=round_ok
)
except OverflowError as err:
if val is not None:
raise OutOfBoundsDatetime(
Expand Down Expand Up @@ -283,6 +287,11 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
obj.value = get_datetime64_nanos(ts, reso)
if obj.value != NPY_NAT:
pandas_datetime_to_datetimestruct(obj.value, reso, &obj.dts)
if tz is not None:
# GH#24559, GH#42288 We treat np.datetime64 objects as *wall* times
obj.value = tz_localize_to_utc_single(
obj.value, tz, ambiguous="raise", nonexistent=None, creso=reso
)
elif is_integer_object(ts):
try:
ts = <int64_t>ts
Expand Down
4 changes: 0 additions & 4 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1873,10 +1873,6 @@ class Timestamp(_Timestamp):
"the tz parameter. Use tz_convert instead.")

tzobj = maybe_get_tz(tz)
if tzobj is not None and is_datetime64_object(ts_input):
# GH#24559, GH#42288 As of 2.0 we treat datetime64 as
# wall-time (consistent with DatetimeIndex)
return cls(ts_input).tz_localize(tzobj)

if nanosecond is None:
nanosecond = 0
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2240,10 +2240,13 @@ def _sequence_to_dt64(
if lib.infer_dtype(data, skipna=False) == "integer":
data = data.astype(np.int64)
elif tz is not None and ambiguous == "raise":
# TODO: yearfirst/dayfirst/etc?
obj_data = np.asarray(data, dtype=object)
i8data = tslib.array_to_datetime_with_tz(
obj_data, tz, abbrev_to_npy_unit(out_unit)
obj_data,
tz=tz,
dayfirst=dayfirst,
yearfirst=yearfirst,
creso=abbrev_to_npy_unit(out_unit),
)
return i8data.view(out_dtype), tz, None
else:
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1247,6 +1247,21 @@ def test_datetimeindex_constructor_misc(self):
assert len(idx1) == len(idx2)
assert idx1.freq == idx2.freq

def test_dti_constructor_object_dtype_dayfirst_yearfirst_with_tz(self):
# GH#55813
val = "5/10/16"

dfirst = Timestamp(2016, 10, 5, tz="US/Pacific")
yfirst = Timestamp(2005, 10, 16, tz="US/Pacific")

result1 = DatetimeIndex([val], tz="US/Pacific", dayfirst=True)
expected1 = DatetimeIndex([dfirst])
tm.assert_index_equal(result1, expected1)

result2 = DatetimeIndex([val], tz="US/Pacific", yearfirst=True)
expected2 = DatetimeIndex([yfirst])
tm.assert_index_equal(result2, expected2)

def test_pass_datetimeindex_to_index(self):
# Bugs in #1396
rng = date_range("1/1/2000", "3/1/2000")
Expand Down

0 comments on commit c2cdeaf

Please sign in to comment.