diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 65532c4cbd27c..0b1eb1760b94f 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -343,6 +343,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ +- Bug in :class:`DatetimeIndex` construction when passing both a ``tz`` and either ``dayfirst`` or ``yearfirst`` ignoring dayfirst/yearfirst (:issue:`55813`) - Bug in :class:`DatetimeIndex` when passing an object-dtype ndarray of float objects and a ``tz`` incorrectly localizing the result (:issue:`55780`) - Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`) - Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`) diff --git a/pandas/_libs/tslib.pyi b/pandas/_libs/tslib.pyi index a803ea0692c74..5a340c1d88bc4 100644 --- a/pandas/_libs/tslib.pyi +++ b/pandas/_libs/tslib.pyi @@ -29,5 +29,9 @@ def array_to_datetime( # returned ndarray may be object dtype or datetime64[ns] def array_to_datetime_with_tz( - values: npt.NDArray[np.object_], tz: tzinfo, creso: int + values: npt.NDArray[np.object_], + tz: tzinfo, + dayfirst: bool, + yearfirst: bool, + creso: int, ) -> npt.NDArray[np.int64]: ... diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3c694ab26d912..da9d65bcfc095 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -61,6 +61,7 @@ from pandas._libs.tslibs.conversion cimport ( _TSObject, cast_from_unit, convert_str_to_tsobject, + convert_to_tsobject, get_datetime64_nanos, parse_pydatetime, ) @@ -673,7 +674,9 @@ cdef _array_to_datetime_object( return oresult_nd, None -def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso): +def array_to_datetime_with_tz( + ndarray values, tzinfo tz, bint dayfirst, bint yearfirst, NPY_DATETIMEUNIT creso +): """ Vectorized analogue to pd.Timestamp(value, tz=tz) @@ -689,7 +692,7 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso) Py_ssize_t i, n = values.size object item int64_t ival - datetime ts + _TSObject tsobj for i in range(n): # Analogous to `item = values[i]` @@ -700,17 +703,12 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso) ival = NPY_NAT else: - if PyDateTime_Check(item) and item.tzinfo is not None: - # We can't call Timestamp constructor with a tz arg, have to - # do 2-step - ts = Timestamp(item).tz_convert(tz) - else: - ts = Timestamp(item, tz=tz) - if ts is NaT: - ival = NPY_NAT - else: - ts = (<_Timestamp>ts)._as_creso(creso) - ival = ts._value + tsobj = convert_to_tsobject( + item, tz=tz, unit="ns", dayfirst=dayfirst, yearfirst=yearfirst, nanos=0 + ) + if tsobj.value != NPY_NAT: + tsobj.ensure_reso(creso, item, round_ok=True) + ival = tsobj.value # Analogous to: result[i] = ival (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index ec743dbf98f6b..e97f4900d20c8 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -24,7 +24,9 @@ cdef class _TSObject: bint fold NPY_DATETIMEUNIT creso - cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso, str val=*) except? -1 + cdef int64_t ensure_reso( + self, NPY_DATETIMEUNIT creso, val=*, bint round_ok=* + ) except? -1 cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 68aaaafd208d4..a2bc6d4d52f2e 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -235,10 +235,14 @@ cdef class _TSObject: self.fold = 0 self.creso = NPY_FR_ns # default value - cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso, str val=None) except? -1: + cdef int64_t ensure_reso( + self, NPY_DATETIMEUNIT creso, val=None, bint round_ok=False + ) except? -1: if self.creso != creso: try: - self.value = convert_reso(self.value, self.creso, creso, False) + self.value = convert_reso( + self.value, self.creso, creso, round_ok=round_ok + ) except OverflowError as err: if val is not None: raise OutOfBoundsDatetime( @@ -283,6 +287,11 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, obj.value = get_datetime64_nanos(ts, reso) if obj.value != NPY_NAT: pandas_datetime_to_datetimestruct(obj.value, reso, &obj.dts) + if tz is not None: + # GH#24559, GH#42288 We treat np.datetime64 objects as *wall* times + obj.value = tz_localize_to_utc_single( + obj.value, tz, ambiguous="raise", nonexistent=None, creso=reso + ) elif is_integer_object(ts): try: ts = ts diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 01aea60268574..d0c3c5c23b272 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1873,10 +1873,6 @@ class Timestamp(_Timestamp): "the tz parameter. Use tz_convert instead.") tzobj = maybe_get_tz(tz) - if tzobj is not None and is_datetime64_object(ts_input): - # GH#24559, GH#42288 As of 2.0 we treat datetime64 as - # wall-time (consistent with DatetimeIndex) - return cls(ts_input).tz_localize(tzobj) if nanosecond is None: nanosecond = 0 diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 968b64e2c3de3..86402fe05e08a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2240,10 +2240,13 @@ def _sequence_to_dt64( if lib.infer_dtype(data, skipna=False) == "integer": data = data.astype(np.int64) elif tz is not None and ambiguous == "raise": - # TODO: yearfirst/dayfirst/etc? obj_data = np.asarray(data, dtype=object) i8data = tslib.array_to_datetime_with_tz( - obj_data, tz, abbrev_to_npy_unit(out_unit) + obj_data, + tz=tz, + dayfirst=dayfirst, + yearfirst=yearfirst, + creso=abbrev_to_npy_unit(out_unit), ) return i8data.view(out_dtype), tz, None else: diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index ab4328788507f..f717165d118b6 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1247,6 +1247,21 @@ def test_datetimeindex_constructor_misc(self): assert len(idx1) == len(idx2) assert idx1.freq == idx2.freq + def test_dti_constructor_object_dtype_dayfirst_yearfirst_with_tz(self): + # GH#55813 + val = "5/10/16" + + dfirst = Timestamp(2016, 10, 5, tz="US/Pacific") + yfirst = Timestamp(2005, 10, 16, tz="US/Pacific") + + result1 = DatetimeIndex([val], tz="US/Pacific", dayfirst=True) + expected1 = DatetimeIndex([dfirst]) + tm.assert_index_equal(result1, expected1) + + result2 = DatetimeIndex([val], tz="US/Pacific", yearfirst=True) + expected2 = DatetimeIndex([yfirst]) + tm.assert_index_equal(result2, expected2) + def test_pass_datetimeindex_to_index(self): # Bugs in #1396 rng = date_range("1/1/2000", "3/1/2000")