Skip to content

Commit

Permalink
BUG: EA-backed boolean indexers
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Sep 11, 2018
1 parent fe35002 commit e1e6314
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 4 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.24.0.txt
Expand Up @@ -484,13 +484,15 @@ ExtensionType Changes
- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`)
- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore
the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`)
- An ``ExtensionArray`` with a boolean dtype now works correctly as a boolean indexer. :meth:`pandas.api.types.is_bool_dtype` now properly considers them boolean (:issue:`22326`)
- Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`).
- The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`)
- Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`)
- :meth:`~Series.shift` now dispatches to :meth:`ExtensionArray.shift` (:issue:`22386`)
- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)
- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`)
- :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`).
- Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`)

.. _whatsnew_0240.api.incompatibilities:

Expand Down Expand Up @@ -608,6 +610,7 @@ Categorical
^^^^^^^^^^^

- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in ``codes`` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of ``.from_codes([1.1, 2.0])``.
- Bug when indexing with a boolean-valued ``Categorical``. Now categoricals are treated as a boolean mask (:issue:`22665`)

Datetimelike
^^^^^^^^^^^^
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/common.py
Expand Up @@ -15,7 +15,9 @@
from pandas import compat
from pandas.compat import iteritems, PY36, OrderedDict
from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCIndexClass
from pandas.core.dtypes.common import is_integer
from pandas.core.dtypes.common import (
is_integer, is_bool_dtype, is_extension_array_dtype, is_array_like
)
from pandas.core.dtypes.inference import _iterable_not_string
from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
Expand Down Expand Up @@ -100,7 +102,8 @@ def maybe_box_datetimelike(value):


def is_bool_indexer(key):
if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)):
if (isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or
(is_array_like(key) and is_extension_array_dtype(key.dtype))):
if key.dtype == np.object_:
key = np.asarray(values_from_object(key))

Expand All @@ -110,7 +113,7 @@ def is_bool_indexer(key):
'NA / NaN values')
return False
return True
elif key.dtype == np.bool_:
elif is_bool_dtype(key.dtype):
return True
elif isinstance(key, list):
try:
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/dtypes/common.py
Expand Up @@ -1608,6 +1608,8 @@ def is_bool_dtype(arr_or_dtype):
False
>>> is_bool_dtype(np.array([True, False]))
True
>>> is_bool_dtype(pd.Categorical([True, False]))
True
"""

if arr_or_dtype is None:
Expand All @@ -1618,6 +1620,13 @@ def is_bool_dtype(arr_or_dtype):
# this isn't even a dtype
return False

if isinstance(arr_or_dtype, (ABCCategorical, ABCCategoricalIndex)):
arr_or_dtype = arr_or_dtype.dtype

if isinstance(arr_or_dtype, CategoricalDtype):
arr_or_dtype = arr_or_dtype.categories
# now we use the special definition for Index

if isinstance(arr_or_dtype, ABCIndexClass):

# TODO(jreback)
Expand All @@ -1626,6 +1635,9 @@ def is_bool_dtype(arr_or_dtype):
# guess this
return (arr_or_dtype.is_object and
arr_or_dtype.inferred_type == 'boolean')
elif is_extension_array_dtype(arr_or_dtype):
dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype)
return issubclass(dtype.type, np.bool_)

return issubclass(tipo, np.bool_)

Expand Down
12 changes: 11 additions & 1 deletion pandas/tests/arrays/categorical/test_indexing.py
Expand Up @@ -5,7 +5,8 @@
import numpy as np

import pandas.util.testing as tm
from pandas import Categorical, Index, CategoricalIndex, PeriodIndex
from pandas import Categorical, Index, CategoricalIndex, PeriodIndex, Series
from pandas.core.common import is_bool_indexer
from pandas.tests.arrays.categorical.common import TestCategorical


Expand Down Expand Up @@ -121,3 +122,12 @@ def test_get_indexer_non_unique(self, idx_values, key_values, key_class):

tm.assert_numpy_array_equal(expected, result)
tm.assert_numpy_array_equal(exp_miss, res_miss)


def test_mask_with_boolean():
s = Series(range(3))
idx = CategoricalIndex([True, False, True])
assert is_bool_indexer(idx)
result = s[idx]
expected = s[idx.astype('object')]
tm.assert_series_equal(result, expected)

0 comments on commit e1e6314

Please sign in to comment.