From 9e0d87d71dd3fafdd2fb4d30c3ea4cdb52e1849a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 7 Dec 2018 07:18:58 -0600 Subject: [PATCH] update docs, cleanup --- doc/source/whatsnew/v0.24.0.rst | 1 - pandas/core/arrays/base.py | 3 ++- pandas/core/arrays/period.py | 5 ----- pandas/core/dtypes/base.py | 5 ----- pandas/core/indexes/category.py | 9 ++++++++- pandas/core/internals/blocks.py | 3 +-- 6 files changed, 11 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 1f906247e6d59..deabc8949126c 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -994,7 +994,6 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your - :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`). - Slicing a single row of a ``DataFrame`` with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`) - Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`) -- Added :meth:`pandas.api.extensions.ExtensionArray.where` (:issue:`24077`) - Bug when concatenating multiple ``Series`` with different extension dtypes not casting to object dtype (:issue:`22994`) - Series backed by an ``ExtensionArray`` now work with :func:`util.hash_pandas_object` (:issue:`23066`) - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 6e1e35bb4fb02..da2125749434f 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -64,7 +64,6 @@ class ExtensionArray(object): * unique * factorize / _values_for_factorize * argsort / _values_for_argsort - * where The remaining methods implemented on this class should be performant, as they only compose abstract methods. Still, a more efficient @@ -221,6 +220,8 @@ def __setitem__(self, key, value): # example, a string like '2018-01-01' is coerced to a datetime # when setting on a datetime64ns array. In general, if the # __init__ method coerces that value, then so should __setitem__ + # Note, also, that Series/DataFrame.where internally use __setitem__ + # on a copy of the data. raise NotImplementedError(_not_implemented_message.format( type(self), '__setitem__') ) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index c1f9d3b946171..d9dde1c699761 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -241,11 +241,6 @@ def _generate_range(cls, start, end, periods, freq, fields): return subarr, freq - def _check_compatible_with(self, other): - if self.freqstr != other.freqstr: - msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) - raise IncompatibleFrequency(msg) - # -------------------------------------------------------------------- # Data / Attributes diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index d453a909fc42e..ab1cb9cf2499a 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -26,11 +26,6 @@ class _DtypeOpsMixin(object): na_value = np.nan _metadata = () - @property - def _ndarray_na_value(self): - """Private method internal to pandas""" - raise AbstractMethodError(self) - def __eq__(self, other): """Check whether 'other' is equal to self. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 94f932d5e8123..bebde5f779dc7 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -501,7 +501,14 @@ def _can_reindex(self, indexer): @Appender(_index_shared_docs['where']) def where(self, cond, other=None): - cat = self.values.where(cond, other=other) + # TODO: Investigate an alternative implementation with + # 1. copy the underyling Categorical + # 2. setitem with `cond` and `other` + # 3. Rebuild CategoricalIndex. + if other is None: + other = self._na_value + values = np.where(cond, self.values, other) + cat = Categorical(values, dtype=self.dtype) return self._shallow_copy(cat, **self._get_attributes_dict()) def reindex(self, target, method=None, level=None, limit=None, diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 5f435a0455657..2fa9b47eb6277 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1970,7 +1970,6 @@ def shift(self, periods, axis=0): def where(self, other, cond, align=True, errors='raise', try_cast=False, axis=0, transpose=False): - # rough attempt to see if if isinstance(other, (ABCIndexClass, ABCSeries)): other = other.array @@ -2004,8 +2003,8 @@ def where(self, other, cond, align=True, errors='raise', else: dtype = self.dtype + # rough heuristic to see if the other array implements setitem if self._holder.__setitem__ is ExtensionArray.__setitem__: - # the array doesn't implement setitem, so convert to ndarray result = self._holder._from_sequence( np.where(cond, self.values, other), dtype=dtype,