Skip to content

Commit

Permalink
Simplify
Browse files Browse the repository at this point in the history
Upcasting is still broken
  • Loading branch information
TomAugspurger committed Apr 25, 2018
1 parent c721915 commit 125ca0b
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 77 deletions.
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1510,7 +1510,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
# TODO(EA): Remove these if / elifs as datetimeTZ, interval, become EAs
# dispatch to internal type takes
if is_extension_array_dtype(arr):
return arr.take(indexer, fill_value=fill_value)
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
elif is_datetimetz(arr):
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
elif is_interval_dtype(arr):
Expand Down
119 changes: 56 additions & 63 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,71 +5,14 @@
This is an experimental API and subject to breaking changes
without warning.
"""
import textwrap

import numpy as np

from pandas.errors import AbstractMethodError
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, Substitution

_not_implemented_message = "{} does not implement {}."


_take_docstring = textwrap.dedent("""\
Take elements from an array.
Parameters
----------
%(arr)s\
indexer : sequence of integers
Indices to be taken. See Notes for how negative indicies
are handled.
fill_value : any, optional
Fill value to use for NA-indicies. This has a few behaviors.
* fill_value is not specified : triggers NumPy's semantics
where negative values in `indexer` mean slices from the end.
* fill_value is NA : Fill positions where `indexer` is ``-1``
with ``self.dtype.na_value``. Anything considered NA by
:func:`pandas.isna` will result in ``self.dtype.na_value``
being used to fill.
* fill_value is not NA : Fill positions where `indexer` is ``-1``
with `fill_value`.
Returns
-------
ExtensionArray
Raises
------
IndexError
When the indexer is out of bounds for the array.
ValueError
When the indexer contains negative values other than ``-1``
and `fill_value` is specified.
Notes
-----
The meaning of negative values in `indexer` depends on the
`fill_value` argument. By default, we follow the behavior
:meth:`numpy.take` of where negative indices indicate slices
from the end.
When `fill_value` is specified, we follow pandas semantics of ``-1``
indicating a missing value. In this case, positions where `indexer`
is ``-1`` will be filled with `fill_value` or the default NA value
for this type.
ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
``iloc``, when the indexer is a sequence of values. Additionally,
it's called by :meth:`Series.reindex` with a `fill_value`.
See Also
--------
numpy.take""")


class ExtensionArray(object):
"""Abstract base class for custom 1-D array types.
Expand Down Expand Up @@ -532,15 +475,66 @@ def _values_for_take(self):
"""
return self.astype(object)

@Substitution(arr='')
@Appender(_take_docstring)
def take(self, indexer, fill_value=None, allow_fill=None):
# type: (Sequence[int], Optional[Any]) -> ExtensionArray
# assert fill_value is not np.nan
# type: (Sequence[int], Optional[Any], Optional[bool]) -> ExtensionArray
"""Take elements from an array.
Parameters
----------
indexer : sequence of integers
Indices to be taken. See Notes for how negative indicies
are handled.
fill_value : any, optional
Fill value to use for NA-indicies. This has a few behaviors.
* fill_value is not specified : triggers NumPy's semantics
where negative values in `indexer` mean slices from the end.
* fill_value is NA : Fill positions where `indexer` is ``-1``
with ``self.dtype.na_value``. Anything considered NA by
:func:`pandas.isna` will result in ``self.dtype.na_value``
being used to fill.
* fill_value is not NA : Fill positions where `indexer` is ``-1``
with `fill_value`.
Returns
-------
ExtensionArray
Raises
------
IndexError
When the indexer is out of bounds for the array.
ValueError
When the indexer contains negative values other than ``-1``
and `fill_value` is specified.
Notes
-----
The meaning of negative values in `indexer` depends on the
`fill_value` argument. By default, we follow the behavior
:meth:`numpy.take` of where negative indices indicate slices
from the end.
When `fill_value` is specified, we follow pandas semantics of ``-1``
indicating a missing value. In this case, positions where `indexer`
is ``-1`` will be filled with `fill_value` or the default NA value
for this type.
ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
``iloc``, when the indexer is a sequence of values. Additionally,
it's called by :meth:`Series.reindex` with a `fill_value`.
See Also
--------
numpy.take
"""
from pandas.core.algorithms import take

data = self._values_for_take()
result = take(data, indexer, fill_value=fill_value)
if allow_fill and fill_value is None:
fill_value = self.dtype.na_value

result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill)
return self._from_sequence(result)

def copy(self, deep=False):
Expand Down Expand Up @@ -605,4 +599,3 @@ def _ndarray_values(self):
used for interacting with our indexers.
"""
return np.array(self)

6 changes: 5 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,11 @@ def changeit():

def maybe_promote(dtype, fill_value=np.nan):
# if we passed an array here, determine the fill value by dtype
if isinstance(fill_value, np.ndarray):
if is_extension_array_dtype(dtype):
# XXX: verify this change
fill_value = dtype.na_value

elif isinstance(fill_value, np.ndarray):
if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)):
fill_value = iNaT
else:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3476,7 +3476,7 @@ def _reindex_index(self, new_index, method, copy, level, fill_value=np.nan,
allow_dups=False)

def _reindex_columns(self, new_columns, method, copy, level,
fill_value=np.nan, limit=None, tolerance=None):
fill_value=None, limit=None, tolerance=None):
new_columns, indexer = self.columns.reindex(new_columns, method=method,
level=level, limit=limit,
tolerance=tolerance)
Expand Down
9 changes: 5 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3660,7 +3660,7 @@ def reindex(self, *args, **kwargs):
copy = kwargs.pop('copy', True)
limit = kwargs.pop('limit', None)
tolerance = kwargs.pop('tolerance', None)
fill_value = kwargs.pop('fill_value', np.nan)
fill_value = kwargs.pop('fill_value', None)

# Series.reindex doesn't use / need the axis kwarg
# We pop and ignore it here, to make writing Series/Frame generic code
Expand Down Expand Up @@ -3776,7 +3776,7 @@ def _reindex_multi(self, axes, copy, fill_value):

@Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)
def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
limit=None, fill_value=np.nan):
limit=None, fill_value=None):
msg = ("'.reindex_axis' is deprecated and will be removed in a future "
"version. Use '.reindex' instead.")
self._consolidate_inplace()
Expand All @@ -3790,7 +3790,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
return self._reindex_with_indexers({axis: [new_index, indexer]},
fill_value=fill_value, copy=copy)

def _reindex_with_indexers(self, reindexers, fill_value=np.nan, copy=False,
def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False,
allow_dups=False):
"""allow_dups indicates an internal call here """

Expand Down Expand Up @@ -7252,7 +7252,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True,
raise TypeError('unsupported type: %s' % type(other))

def _align_frame(self, other, join='outer', axis=None, level=None,
copy=True, fill_value=np.nan, method=None, limit=None,
copy=True, fill_value=None, method=None, limit=None,
fill_axis=0):
# defaults
join_index, join_columns = None, None
Expand Down Expand Up @@ -7420,6 +7420,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
if other.ndim <= self.ndim:

_, other = self.align(other, join='left', axis=axis,
# XXX
level=level, fill_value=np.nan)

# if we are NOT aligned, raise as we cannot where index
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1888,6 +1888,11 @@ def _holder(self):
# For extension blocks, the holder is values-dependent.
return type(self.values)

@property
def fill_value(self):
# Used in reindex_indexer
return self.values.dtype.na_value

@property
def _can_hold_na(self):
# The default ExtensionArray._can_hold_na is True
Expand Down Expand Up @@ -1951,7 +1956,8 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
# axis doesn't matter; we are really a single-dim object
# but are passed the axis depending on the calling routing
# if its REALLY axis 0, then this will be a reindex and not a take
new_values = self.values.take(indexer, fill_value=fill_value)
new_values = self.values.take(indexer, fill_value=fill_value,
allow_fill=True)

# if we are a 1-dim object, then always place at 0
if self.ndim == 1:
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2185,7 +2185,7 @@ def _binop(self, other, func, level=None, fill_value=None):
result.name = None
return result

def combine(self, other, func, fill_value=np.nan):
def combine(self, other, func, fill_value=None):
"""
Perform elementwise binary operation on two Series using given function
with optional fill value when an index is missing from one Series or
Expand Down Expand Up @@ -3216,7 +3216,10 @@ def _reindex_indexer(self, new_index, indexer, copy):
return self.copy()
return self

new_values = algorithms.take_1d(self._values, indexer)
# TODO: determine if we want EA to handle fill_value=None
# if not, then we have to determine this here.
new_values = algorithms.take_1d(self._values, indexer,
fill_value=None, allow_fill=True)
return self._constructor(new_values, index=new_index)

def _needs_reindex_multi(self, axes, method, level):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def test_take_non_na_fill_value(self, data_missing):

def test_take_pandas_style_negative_raises(self, data, na_value):
with pytest.raises(ValueError):
data.take([0, -2], fill_value=na_value)
data.take([0, -2], fill_value=na_value, allow_fill=True)

@pytest.mark.xfail(reason="Series.take with extension array buggy for -1")
def test_take_series(self, data):
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/extension/decimal/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class DecimalArray(ExtensionArray):
dtype = DecimalDtype()

def __init__(self, values):
assert all(isinstance(v, decimal.Decimal) for v in values)
values = np.asarray(values, dtype=object)

self._data = values
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/extension/decimal/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ def test_take_basic(self):
decimal.Decimal('3')])
self.assert_extension_array_equal(result, expected)

result = ea.take([1, 2, -1], fill_value=ea.dtype.na_value)
result = ea.take([1, 2, -1], fill_value=ea.dtype.na_value,
allow_fill=True)
expected = DecimalArray([decimal.Decimal('2'),
decimal.Decimal('3'),
decimal.Decimal('NaN')])
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/extension/json/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,20 +94,22 @@ def nbytes(self):
def isna(self):
return np.array([x == self.dtype.na_value for x in self.data])

def take(self, indexer, fill_value=None):
def take(self, indexer, fill_value=None, allow_fill=None):
# re-implement here, since NumPy has trouble setting
# sized objects like UserDicts into scalar slots of
# an ndarary.
indexer = np.asarray(indexer)
msg = ("Index is out of bounds or cannot do a "
"non-empty take from an empty array.")

if fill_value is None:
if allow_fill is None:
try:
output = [self.data[loc] for loc in indexer]
except IndexError:
raise IndexError(msg)
else:
if fill_value is None:
fill_value = self.dtype.na_value
# bounds check
if (indexer < -1).any():
raise ValueError
Expand Down

0 comments on commit 125ca0b

Please sign in to comment.