From fb2d0deff622c76c03818fe7e449352fcdc066bf Mon Sep 17 00:00:00 2001 From: Gabriel Reid Date: Sun, 9 Dec 2018 23:25:40 +0100 Subject: [PATCH] BUG: date_range issue with millisecond resolution (#24129) Fixes #24110, by avoid floating-point rounding issues with millisecond resolution or higher timestamps when creating a date range. --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/arrays/datetimes.py | 7 ++++++- pandas/tests/indexes/datetimes/test_date_range.py | 11 +++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index cd39618e4850a8..6b8d548251061d 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1285,6 +1285,7 @@ Datetimelike - Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`) - Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and `dtype=object` would incorrectly raise a ``ValueError`` (:issue:`23524`) - Bug in :class:`Index` where calling ``np.array(dtindex, dtype=object)`` on a timezone-naive :class:`DatetimeIndex` would return an array of ``datetime`` objects instead of :class:`Timestamp` objects, potentially losing nanosecond portions of the timestamps (:issue:`23524`) +- Bug in :func:`date_range` where using dates with millisecond resolution or higher could return incorrect values or the wrong number of values in the index (:issue:`24110`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b74ede4547249d..72ab400b41c0c4 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -307,7 +307,12 @@ def _generate_range(cls, start, end, periods, freq, tz=None, end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time - arr = np.linspace(start.value, end.value, periods) + # Nanosecond-granularity timestamps aren't always correctly + # representable with doubles, so we limit the range that we + # pass to np.linspace as much as possible + arr = np.linspace( + 0, end.value - start.value, + periods, dtype='int64') + start.value index = cls._simple_new( arr.astype('M8[ns]', copy=False), freq=None, tz=tz ) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 54a04ab6f80fd9..11cefec4f34cf6 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -769,3 +769,14 @@ def test_all_custom_freq(self, freq): msg = 'invalid custom frequency string: {freq}' with pytest.raises(ValueError, match=msg.format(freq=bad_freq)): bdate_range(START, END, freq=bad_freq) + + @pytest.mark.parametrize('start_end', [ + ('2018-01-01T00:00:01.000Z', '2018-01-03T00:00:01.000Z'), + ('2018-01-01T00:00:00.010Z', '2018-01-03T00:00:00.010Z'), + ('2001-01-01T00:00:00.010Z', '2001-01-03T00:00:00.010Z')]) + def test_range_with_millisecond_resolution(self, start_end): + # https://github.com/pandas-dev/pandas/issues/24110 + start, end = start_end + result = pd.date_range(start=start, end=end, periods=2, closed='left') + expected = DatetimeIndex([start]) + tm.assert_index_equal(result, expected)