Skip to content

Commit

Permalink
implement deprecation portion of pandas-dev#23675 (pandas-dev#23937)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and Pingviinituutti committed Feb 28, 2019
1 parent 727e3c9 commit 458ccab
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 28 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,7 @@ Deprecations
`use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`)
- :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`)
- Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`)
- Constructing a :class:`DatetimeIndex` from data with ``timedelta64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23675`)
- The ``keep_tz=False`` option (the default) of the ``keep_tz`` keyword of
:meth:`DatetimeIndex.to_series` is deprecated (:issue:`17832`).
- Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`)
Expand Down
82 changes: 80 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
from pandas.util._decorators import Appender, cache_readonly

from pandas.core.dtypes.common import (
_NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_int64_dtype,
is_object_dtype)
_NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type,
is_float_dtype, is_int64_dtype, is_object_dtype, is_period_dtype,
is_timedelta64_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna
Expand Down Expand Up @@ -1421,6 +1422,83 @@ def to_julian_date(self):
DatetimeArrayMixin._add_datetimelike_methods()


# -------------------------------------------------------------------
# Constructor Helpers

def maybe_infer_tz(tz, inferred_tz):
"""
If a timezone is inferred from data, check that it is compatible with
the user-provided timezone, if any.
Parameters
----------
tz : tzinfo or None
inferred_tz : tzinfo or None
Returns
-------
tz : tzinfo or None
Raises
------
TypeError : if both timezones are present but do not match
"""
if tz is None:
tz = inferred_tz
elif inferred_tz is None:
pass
elif not timezones.tz_compare(tz, inferred_tz):
raise TypeError('data is already tz-aware {inferred_tz}, unable to '
'set specified tz: {tz}'
.format(inferred_tz=inferred_tz, tz=tz))
return tz


def maybe_convert_dtype(data, copy):
"""
Convert data based on dtype conventions, issuing deprecation warnings
or errors where appropriate.
Parameters
----------
data : np.ndarray or pd.Index
copy : bool
Returns
-------
data : np.ndarray or pd.Index
copy : bool
Raises
------
TypeError : PeriodDType data is passed
"""
if is_float_dtype(data):
# Note: we must cast to datetime64[ns] here in order to treat these
# as wall-times instead of UTC timestamps.
data = data.astype(_NS_DTYPE)
copy = False
# TODO: deprecate this behavior to instead treat symmetrically
# with integer dtypes. See discussion in GH#23675

elif is_timedelta64_dtype(data):
warnings.warn("Passing timedelta64-dtype data is deprecated, will "
"raise a TypeError in a future version",
FutureWarning, stacklevel=3)
data = data.view(_NS_DTYPE)

elif is_period_dtype(data):
# Note: without explicitly raising here, PeriondIndex
# test_setops.test_join_does_not_recur fails
raise TypeError("Passing PeriodDtype data is invalid. "
"Use `data.to_timestamp()` instead")

elif is_extension_type(data) and not is_datetime64tz_dtype(data):
# Includes categorical
# TODO: We have no tests for these
data = np.array(data, dtype=np.object_)
copy = False

return data, copy


def _generate_regular_range(cls, start, end, periods, freq):
"""
Generate a range of dates with the spans between dates described by
Expand Down
50 changes: 25 additions & 25 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,17 @@

from pandas.core.dtypes.common import (
_INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype,
is_datetime64_ns_dtype, is_datetimetz, is_dtype_equal, is_float,
is_integer, is_integer_dtype, is_list_like, is_period_dtype, is_scalar,
is_string_like, pandas_dtype)
is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetimetz,
is_dtype_equal, is_float, is_integer, is_integer_dtype, is_list_like,
is_period_dtype, is_scalar, is_string_like, pandas_dtype)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.missing import isna

from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays.datetimes import (
DatetimeArrayMixin as DatetimeArray, _to_m8)
DatetimeArrayMixin as DatetimeArray, _to_m8, maybe_convert_dtype,
maybe_infer_tz)
from pandas.core.base import _shared_docs
import pandas.core.common as com
from pandas.core.indexes.base import Index, _index_shared_docs
Expand Down Expand Up @@ -246,50 +247,49 @@ def __new__(cls, data=None,
name = data.name

freq, freq_infer = dtl.maybe_infer_freq(freq)
if freq is None and hasattr(data, "freq"):
# i.e. DatetimeArray/Index
freq = data.freq
verify_integrity = False

# if dtype has an embedded tz, capture it
tz = dtl.validate_tz_from_dtype(dtype, tz)

if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArray)):
# other iterable of some kind
if not isinstance(data, (list, tuple)):
if not hasattr(data, "dtype"):
# e.g. list, tuple
if np.ndim(data) == 0:
# i.e. generator
data = list(data)
data = np.asarray(data, dtype='O')
data = np.asarray(data)
copy = False
elif isinstance(data, ABCSeries):
data = data._values

# data must be Index or np.ndarray here
# By this point we are assured to have either a numpy array or Index
data, copy = maybe_convert_dtype(data, copy)

if not (is_datetime64_dtype(data) or is_datetimetz(data) or
is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'):
data = tools.to_datetime(data, dayfirst=dayfirst,
yearfirst=yearfirst)

if isinstance(data, DatetimeArray):
if tz is None:
tz = data.tz
elif data.tz is None:
data = data.tz_localize(tz, ambiguous=ambiguous)
else:
# the tz's must match
if not timezones.tz_compare(tz, data.tz):
msg = ('data is already tz-aware {0}, unable to '
'set specified tz: {1}')
raise TypeError(msg.format(data.tz, tz))

if is_datetime64tz_dtype(data):
tz = maybe_infer_tz(tz, data.tz)
subarr = data._data

if freq is None:
freq = data.freq
verify_integrity = False
elif issubclass(data.dtype.type, np.datetime64):
elif is_datetime64_dtype(data):
# tz-naive DatetimeArray/Index or ndarray[datetime64]
data = getattr(data, "_data", data)
if data.dtype != _NS_DTYPE:
data = conversion.ensure_datetime64ns(data)

if tz is not None:
# Convert tz-naive to UTC
tz = timezones.maybe_get_tz(tz)
data = conversion.tz_localize_to_utc(data.view('i8'), tz,
ambiguous=ambiguous)
subarr = data.view(_NS_DTYPE)

else:
# must be integer dtype otherwise
# assume this data are epoch timestamps
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
- ndarray of Timestamps if box=False
"""
from pandas import DatetimeIndex
from pandas.core.arrays.datetimes import maybe_convert_dtype

if isinstance(arg, (list, tuple)):
arg = np.array(arg, dtype='O')

Expand Down Expand Up @@ -208,6 +210,11 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
raise TypeError('arg must be a string, datetime, list, tuple, '
'1-d array, or Series')

# warn if passing timedelta64, raise for PeriodDtype
# NB: this must come after unit transformation
orig_arg = arg
arg, _ = maybe_convert_dtype(arg, copy=False)

arg = ensure_object(arg)
require_iso8601 = False

Expand All @@ -231,7 +238,10 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
# shortcut formatting here
if format == '%Y%m%d':
try:
result = _attempt_YYYYMMDD(arg, errors=errors)
# pass orig_arg as float-dtype may have been converted to
# datetime64[ns]
orig_arg = ensure_object(orig_arg)
result = _attempt_YYYYMMDD(orig_arg, errors=errors)
except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
raise ValueError("cannot convert the input to "
"'%Y%m%d' date format")
Expand Down
40 changes: 40 additions & 0 deletions pandas/tests/indexes/datetimes/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,51 @@
from pandas import (
DatetimeIndex, Index, Timestamp, date_range, datetime, offsets,
to_datetime)
from pandas.core.arrays import period_array
import pandas.util.testing as tm


class TestDatetimeIndex(object):

def test_dti_with_period_data_raises(self):
# GH#23675
data = pd.PeriodIndex(['2016Q1', '2016Q2'], freq='Q')

with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
DatetimeIndex(data)

with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
to_datetime(data)

with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
DatetimeIndex(period_array(data))

with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
to_datetime(period_array(data))

def test_dti_with_timedelta64_data_deprecation(self):
# GH#23675
data = np.array([0], dtype='m8[ns]')
with tm.assert_produces_warning(FutureWarning):
result = DatetimeIndex(data)

assert result[0] == Timestamp('1970-01-01')

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = to_datetime(data)

assert result[0] == Timestamp('1970-01-01')

with tm.assert_produces_warning(FutureWarning):
result = DatetimeIndex(pd.TimedeltaIndex(data))

assert result[0] == Timestamp('1970-01-01')

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = to_datetime(pd.TimedeltaIndex(data))

assert result[0] == Timestamp('1970-01-01')

def test_construction_caching(self):

df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3),
Expand Down

0 comments on commit 458ccab

Please sign in to comment.