Skip to content

Commit

Permalink
BUG/REF: TimedeltaIndex.__new__ (pandas-dev#23539)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and Pingviinituutti committed Feb 28, 2019
1 parent 3d6ab98 commit 13a3054
Show file tree
Hide file tree
Showing 12 changed files with 330 additions and 103 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ Backwards incompatible API changes

- A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`)
- :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
- Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)

.. _whatsnew_0240.api_breaking.deps:

Expand Down Expand Up @@ -969,6 +970,7 @@ Deprecations
- The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`)
- Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of
`use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`)
- Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`)

.. _whatsnew_0240.deprecations.datetimelike_int_ops:

Expand Down
4 changes: 1 addition & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):

result = cls._simple_new(values, freq=freq, tz=tz)
if freq_infer:
inferred = result.inferred_freq
if inferred:
result.freq = to_offset(inferred)
result.freq = to_offset(result.inferred_freq)

# NB: Among other things not yet ported from the DatetimeIndex
# constructor, this does not call _deepcopy_if_needed
Expand Down
179 changes: 172 additions & 7 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,28 @@
# -*- coding: utf-8 -*-
from datetime import timedelta
import warnings

import numpy as np

from pandas._libs import tslibs
from pandas._libs.tslibs import Timedelta, Timestamp, NaT
from pandas._libs.tslibs import Timedelta, Timestamp, NaT, iNaT
from pandas._libs.tslibs.fields import get_timedelta_field
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
from pandas._libs.tslibs.timedeltas import (
array_to_timedelta64, parse_timedelta_unit)

from pandas import compat

from pandas.core.dtypes.common import (
_TD_DTYPE, is_list_like)
from pandas.core.dtypes.generic import ABCSeries
_TD_DTYPE,
is_object_dtype,
is_string_dtype,
is_float_dtype,
is_integer_dtype,
is_timedelta64_dtype,
is_datetime64_dtype,
is_list_like,
ensure_int64)
from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex
from pandas.core.dtypes.missing import isna

import pandas.core.common as com
Expand Down Expand Up @@ -139,9 +149,7 @@ def __new__(cls, values, freq=None):

result = cls._simple_new(values, freq=freq)
if freq_infer:
inferred = result.inferred_freq
if inferred:
result.freq = to_offset(inferred)
result.freq = to_offset(result.inferred_freq)

return result

Expand Down Expand Up @@ -397,6 +405,163 @@ def f(x):
# ---------------------------------------------------------------------
# Constructor Helpers

def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
"""
Parameters
----------
array : list-like
copy : bool, default False
unit : str, default "ns"
errors : {"raise", "coerce", "ignore"}, default "raise"
Returns
-------
ndarray[timedelta64[ns]]
inferred_freq : Tick or None
Raises
------
ValueError : data cannot be converted to timedelta64[ns]
Notes
-----
Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause
errors to be ignored; they are caught and subsequently ignored at a
higher level.
"""
inferred_freq = None
unit = parse_timedelta_unit(unit)

# Unwrap whatever we have into a np.ndarray
if not hasattr(data, 'dtype'):
# e.g. list, tuple
if np.ndim(data) == 0:
# i.e. generator
data = list(data)
data = np.array(data, copy=False)
elif isinstance(data, ABCSeries):
data = data._values
elif isinstance(data, (ABCTimedeltaIndex, TimedeltaArrayMixin)):
inferred_freq = data.freq
data = data._data

# Convert whatever we have into timedelta64[ns] dtype
if is_object_dtype(data) or is_string_dtype(data):
# no need to make a copy, need to convert if string-dtyped
data = objects_to_td64ns(data, unit=unit, errors=errors)
copy = False

elif is_integer_dtype(data):
# treat as multiples of the given unit
data, copy_made = ints_to_td64ns(data, unit=unit)
copy = copy and not copy_made

elif is_float_dtype(data):
# treat as multiples of the given unit. If after converting to nanos,
# there are fractional components left, these are truncated
# (i.e. NOT rounded)
mask = np.isnan(data)
coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns')
data = (coeff * data).astype(np.int64).view('timedelta64[ns]')
data[mask] = iNaT
copy = False

elif is_timedelta64_dtype(data):
if data.dtype != _TD_DTYPE:
# non-nano unit
# TODO: watch out for overflows
data = data.astype(_TD_DTYPE)
copy = False

elif is_datetime64_dtype(data):
# GH#23539
warnings.warn("Passing datetime64-dtype data to TimedeltaIndex is "
"deprecated, will raise a TypeError in a future "
"version",
FutureWarning, stacklevel=3)
data = ensure_int64(data).view(_TD_DTYPE)

else:
raise TypeError("dtype {dtype} cannot be converted to timedelta64[ns]"
.format(dtype=data.dtype))

data = np.array(data, copy=copy)
assert data.dtype == 'm8[ns]', data
return data, inferred_freq


def ints_to_td64ns(data, unit="ns"):
"""
Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating
the integers as multiples of the given timedelta unit.
Parameters
----------
data : np.ndarray with integer-dtype
unit : str, default "ns"
Returns
-------
ndarray[timedelta64[ns]]
bool : whether a copy was made
"""
copy_made = False
unit = unit if unit is not None else "ns"

if data.dtype != np.int64:
# converting to int64 makes a copy, so we can avoid
# re-copying later
data = data.astype(np.int64)
copy_made = True

if unit != "ns":
dtype_str = "timedelta64[{unit}]".format(unit=unit)
data = data.view(dtype_str)

# TODO: watch out for overflows when converting from lower-resolution
data = data.astype("timedelta64[ns]")
# the astype conversion makes a copy, so we can avoid re-copying later
copy_made = True

else:
data = data.view("timedelta64[ns]")

return data, copy_made


def objects_to_td64ns(data, unit="ns", errors="raise"):
"""
Convert a object-dtyped or string-dtyped array into an
timedelta64[ns]-dtyped array.
Parameters
----------
data : ndarray or Index
unit : str, default "ns"
errors : {"raise", "coerce", "ignore"}, default "raise"
Returns
-------
ndarray[timedelta64[ns]]
Raises
------
ValueError : data cannot be converted to timedelta64[ns]
Notes
-----
Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause
errors to be ignored; they are caught and subsequently ignored at a
higher level.
"""
# coerce Index to np.ndarray, converting string-dtype if necessary
values = np.array(data, dtype=np.object_, copy=False)

result = array_to_timedelta64(values,
unit=unit, errors=errors)
return result.view('timedelta64[ns]')


def _generate_regular_range(start, end, periods, offset):
stride = offset.nanos
if periods is None:
Expand Down
31 changes: 16 additions & 15 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,21 @@ def __new__(cls, data=None,
dayfirst=False, yearfirst=False, dtype=None,
copy=False, name=None, verify_integrity=True):

if data is None:
# TODO: Remove this block and associated kwargs; GH#20535
result = cls._generate_range(start, end, periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
result.name = name
return result

if is_scalar(data):
raise TypeError("{cls}() must be called with a "
"collection of some kind, {data} was passed"
.format(cls=cls.__name__, data=repr(data)))

# - Cases checked above all return/raise before reaching here - #

# This allows to later ensure that the 'copy' parameter is honored:
if isinstance(data, Index):
ref_to_data = data._data
Expand All @@ -253,20 +268,8 @@ def __new__(cls, data=None,
# if dtype has an embedded tz, capture it
tz = dtl.validate_tz_from_dtype(dtype, tz)

if data is None:
# TODO: Remove this block and associated kwargs; GH#20535
result = cls._generate_range(start, end, periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
result.name = name
return result

if not isinstance(data, (np.ndarray, Index, ABCSeries,
DatetimeArrayMixin)):
if is_scalar(data):
raise ValueError('DatetimeIndex() must be called with a '
'collection of some kind, %s was passed'
% repr(data))
# other iterable of some kind
if not isinstance(data, (list, tuple)):
data = list(data)
Expand Down Expand Up @@ -328,9 +331,7 @@ def __new__(cls, data=None,
cls._validate_frequency(subarr, freq, ambiguous=ambiguous)

if freq_infer:
inferred = subarr.inferred_freq
if inferred:
subarr.freq = to_offset(inferred)
subarr.freq = to_offset(subarr.inferred_freq)

return subarr._deepcopy_if_needed(ref_to_data, copy)

Expand Down
61 changes: 26 additions & 35 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from pandas.core.dtypes.missing import isna

from pandas.core.arrays.timedeltas import (
TimedeltaArrayMixin, _is_convertible_to_td, _to_m8)
TimedeltaArrayMixin, _is_convertible_to_td, _to_m8,
sequence_to_td64ns)
from pandas.core.arrays import datetimelike as dtl

from pandas.core.indexes.base import Index
Expand All @@ -33,10 +34,9 @@
TimelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op,
wrap_array_method, wrap_field_accessor)
from pandas.core.tools.timedeltas import (
to_timedelta, _coerce_scalar_to_timedelta_type)
_coerce_scalar_to_timedelta_type)
from pandas._libs import (lib, index as libindex,
join as libjoin, Timedelta, NaT)
from pandas._libs.tslibs.timedeltas import array_to_timedelta64


class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin,
Expand Down Expand Up @@ -139,12 +139,6 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
periods=None, closed=None, dtype=None, copy=False,
name=None, verify_integrity=True):

if isinstance(data, TimedeltaIndex) and freq is None and name is None:
if copy:
return data.copy()
else:
return data._shallow_copy()

freq, freq_infer = dtl.maybe_infer_freq(freq)

if data is None:
Expand All @@ -154,32 +148,31 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
result.name = name
return result

if unit is not None:
data = to_timedelta(data, unit=unit, box=False)

if is_scalar(data):
raise ValueError('TimedeltaIndex() must be called with a '
'collection of some kind, {data} was passed'
.format(data=repr(data)))

# convert if not already
if getattr(data, 'dtype', None) != _TD_DTYPE:
data = to_timedelta(data, unit=unit, box=False)
elif copy:
data = np.array(data, copy=True)

data = np.array(data, copy=False)
if data.dtype == np.object_:
data = array_to_timedelta64(data)
if data.dtype != _TD_DTYPE:
if is_timedelta64_dtype(data):
# non-nano unit
# TODO: watch out for overflows
data = data.astype(_TD_DTYPE)
raise TypeError('{cls}() must be called with a '
'collection of some kind, {data} was passed'
.format(cls=cls.__name__, data=repr(data)))

if isinstance(data, TimedeltaIndex) and freq is None and name is None:
if copy:
return data.copy()
else:
data = ensure_int64(data).view(_TD_DTYPE)
return data._shallow_copy()

assert data.dtype == 'm8[ns]', data.dtype
# - Cases checked above all return/raise before reaching here - #

data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
if inferred_freq is not None:
if freq is not None and freq != inferred_freq:
raise ValueError('Inferred frequency {inferred} from passed '
'values does not conform to passed frequency '
'{passed}'
.format(inferred=inferred_freq,
passed=freq.freqstr))
elif freq_infer:
freq = inferred_freq
freq_infer = False
verify_integrity = False

subarr = cls._simple_new(data, name=name, freq=freq)
# check that we are matching freqs
Expand All @@ -188,9 +181,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
cls._validate_frequency(subarr, freq)

if freq_infer:
inferred = subarr.inferred_freq
if inferred:
subarr.freq = to_offset(inferred)
subarr.freq = to_offset(subarr.inferred_freq)

return subarr

Expand Down
Loading

0 comments on commit 13a3054

Please sign in to comment.