diff --git a/doc/source/api.rst b/doc/source/api.rst index 2e9acfc1e09ed..d4f1f5c0cdaf4 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -720,6 +720,19 @@ strings and apply several methods to it. These can be accessed like Series.dt Index.str + +.. _api.arrays: + +Arrays +------ + +Pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`). + +.. autosummary:: + :toctree: generated/ + + array + .. _api.categorical: Categorical @@ -808,6 +821,65 @@ following usable methods and properties: Series.cat.as_ordered Series.cat.as_unordered +.. _api.arrays.integerna: + +Integer-NA +~~~~~~~~~~ + +:class:`arrays.IntegerArray` can hold integer data, potentially with missing +values. + +.. autosummary:: + :toctree: generated/ + + arrays.IntegerArray + +.. _api.arrays.interval: + +Interval +~~~~~~~~ + +:class:`IntervalArray` is an array for storing data representing intervals. +The scalar type is a :class:`Interval`. These may be stored in a :class:`Series` +or as a :class:`IntervalIndex`. :class:`IntervalArray` can be closed on the +``'left'``, ``'right'``, or ``'both'``, or ``'neither'`` sides. +See :ref:`indexing.intervallindex` for more. + +.. currentmodule:: pandas + +.. autosummary:: + :toctree: generated/ + + IntervalArray + +.. _api.arrays.period: + +Period +~~~~~~ + +Periods represent a span of time (e.g. the year 2000, or the hour from 11:00 to 12:00 +on January 1st, 2000). A collection of :class:`Period` objects with a common frequency +can be collected in a :class:`PeriodArray`. See :ref:`timeseries.periods` for more. + +.. autosummary:: + :toctree: generated/ + + arrays.PeriodArray + +Sparse +~~~~~~ + +Sparse data may be stored and operated on more efficiently when there is a single value +that's often repeated. :class:`SparseArray` is a container for this type of data. +See :ref:`sparse` for more. + +.. _api.arrays.sparse: + +.. autosummary:: + :toctree: generated/ + + SparseArray + Plotting ~~~~~~~~ @@ -1701,6 +1773,7 @@ IntervalIndex Components IntervalIndex.get_indexer IntervalIndex.set_closed IntervalIndex.overlaps + IntervalArray.to_tuples .. _api.multiindex: @@ -1933,6 +2006,8 @@ Methods PeriodIndex.strftime PeriodIndex.to_timestamp +.. api.scalars: + Scalars ------- diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 3e8e1b2168a7c..9209536ff13f9 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -161,6 +161,41 @@ Reduction and groupby operations such as 'sum' work. The Integer NA support currently uses the captilized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date. +.. _whatsnew_0240.enhancements.array: + +A new top-level method :func:`array` has been added for creating 1-dimensional arrays (:issue:`22860`). +This can be used to create any :ref:`extension array `, including +extension arrays registered by :ref:`3rd party libraries `. See + +See :ref:`Dtypes ` for more on extension arrays. + +.. ipython:: python + + pd.array([1, 2, np.nan], dtype='Int64') + pd.array(['a', 'b', 'c'], dtype='category') + +Passing data for which there isn't dedicated extension type (e.g. float, integer, etc.) +will return a new :class:`arrays.PandasArray`, which is just a thin (no-copy) +wrapper around a :class:`numpy.ndarray` that satisfies the extension array interface. + +.. ipython:: python + + pd.array([1, 2, 3]) + +On their own, a :class:`arrays.PandasArray` isn't a very useful object. +But if you need write low-level code that works generically for any +:class:`~pandas.api.extensions.ExtensionArray`, :class:`arrays.PandasArray` +satisfies that need. + +Notice that by default, if no ``dtype`` is specified, the dtype of the returned +array is inferred from the data. In particular, note that the first example of +``[1, 2, np.nan]`` would have returned a floating-point array, since ``NaN`` +is a float. + +.. ipython:: python + + pd.array([1, 2, np.nan]) + .. _whatsnew_0240.enhancements.read_html: ``read_html`` Enhancements diff --git a/pandas/arrays/__init__.py b/pandas/arrays/__init__.py index f6a528bc87b54..1a7d5821be0cb 100644 --- a/pandas/arrays/__init__.py +++ b/pandas/arrays/__init__.py @@ -3,9 +3,17 @@ See :ref:`extending.extension-types` for more. """ -from pandas.core.arrays import PandasArray +from pandas.core.arrays import ( + IntervalArray, PeriodArray, Categorical, SparseArray, IntegerArray, + PandasArray +) __all__ = [ - 'PandasArray' + 'Categorical', + 'IntegerArray', + 'IntervalArray', + 'PandasArray', + 'PeriodArray', + 'SparseArray', ] diff --git a/pandas/core/api.py b/pandas/core/api.py index ad35b647ac458..afc929c39086c 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -4,9 +4,26 @@ import numpy as np +from pandas.core.arrays import IntervalArray +from pandas.core.arrays.integer import ( + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, +) from pandas.core.algorithms import factorize, unique, value_counts from pandas.core.dtypes.missing import isna, isnull, notna, notnull -from pandas.core.arrays import Categorical +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + PeriodDtype, + IntervalDtype, + DatetimeTZDtype, +) +from pandas.core.arrays import Categorical, array from pandas.core.groupby import Grouper from pandas.io.formats.format import set_eng_float_format from pandas.core.index import (Index, CategoricalIndex, Int64Index, diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index c317786e7d633..d6a61a26a954f 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -1,3 +1,4 @@ +from .array_ import array # noqa from .base import (ExtensionArray, # noqa ExtensionOpsMixin, ExtensionScalarOpsMixin) diff --git a/pandas/core/arrays/array_.py b/pandas/core/arrays/array_.py new file mode 100644 index 0000000000000..173ed7d191ac9 --- /dev/null +++ b/pandas/core/arrays/array_.py @@ -0,0 +1,227 @@ +from pandas._libs import lib, tslibs + +from pandas.core.dtypes.common import is_extension_array_dtype +from pandas.core.dtypes.dtypes import registry + +from pandas import compat + + +def array(data, # type: Sequence[object] + dtype=None, # type: Optional[Union[str, np.dtype, ExtensionDtype]] + copy=True, # type: bool + ): + # type: (...) -> ExtensionArray + """ + Create an array. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + data : Sequence of objects + The scalars inside `data` should be instances of the + scalar type for `dtype`. It's expected that `data` + represents a 1-dimensional array of data. + + When `data` is an Index or Series, the underlying array + will be extracted from `data`. + + dtype : str, np.dtype, or ExtensionDtype, optional + The dtype to use for the array. This may be a NumPy + dtype or an extension type registered with pandas using + :meth:`pandas.api.extensions.register_extension_dtype`. + + If not specified, there are two possibilities: + + 1. When `data` is a :class:`Series`, :class:`Index`, or + :class:`ExtensionArray`, the `dtype` will be taken + from the data. + 2. Otherwise, pandas will attempt to infer the `dtype` + from the data. + + Note that when `data` is a NumPy array, ``data.dtype`` is + *not* used for inferring the array type. This is because + NumPy cannot represent all the types of data that can be + held in extension arrays. + + Currently, pandas will infer an extension dtype for sequences of + + ========================== ================================== + scalar type Array Type + ========================== ================================== + * :class:`pandas.Interval` :class:`pandas.IntervalArray` + * :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` + ========================== ================================== + + For all other cases, NumPy's usual inference rules will be used. + + copy : bool, default True + Whether to copy the data, even if not necessary. Depending + on the type of `data`, creating the new array may require + copying data, even if ``copy=False``. + + Returns + ------- + array : ExtensionArray + + Raises + ------ + ValueError + When `data` is not 1-dimensional. + + See Also + -------- + numpy.array : Construct a NumPy array. + arrays.PandasArray : ExtensionArray wrapping a NumPy array. + Series : Construct a pandas Series. + Index : Construct a pandas Index. + + Notes + ----- + Omitting the `dtype` argument means pandas will attempt to infer the + best array type from the values in the data. As new array types are + added by pandas and 3rd party libraries, the "best" array type may + change. We recommend specifying `dtype` to ensure that + + 1. the correct array type for the data is returned + 2. the returned array type doesn't change as new extension types + are added by pandas and third-party libraries + + Additionally, if the underlying memory representation of the returned + array matters, we recommend specifying the `dtype` as a concrete object + rather than a string alias or allowing it to be inferred. For example, + a future version of pandas or a 3rd-party library may include a + dedicated ExtensionArray for string data. In this event, the following + would no longer return a :class:`arrays.PandasArray` backed by a NumPy + array. + + >>> pd.array(['a', 'b'], dtype=str) + + ['a', 'b'] + Length: 2, dtype: str32 + + This would instead return the new ExtensionArray dedicated for string + data. If you really need the new array to be backed by a NumPy array, + specify that in the dtype. + + >>> pd.array(['a', 'b'], dtype=np.dtype(" + ['a', 'b'] + Length: 2, dtype: str32 + + Or use the dedicated constructor for the array you're expecting, and + wrap that in a PandasArray + + >>> pd.array(np.array(['a', 'b'], dtype=' + ['a', 'b'] + Length: 2, dtype: str32 + + Examples + -------- + If a dtype is not specified, `data` is passed through to + :meth:`numpy.array`, and a :class:`arrays.PandasArray` is returned. + + >>> pd.array([1, 2]) + + [1, 2] + Length: 2, dtype: int64 + + Or the NumPy dtype can be specified + + >>> pd.array([1, 2], dtype=np.dtype("int32")) + + [1, 2] + Length: 2, dtype: int32 + + You can use the string alias for `dtype` + + >>> pd.array(['a', 'b', 'a'], dtype='category') + [a, b, a] + Categories (2, object): [a, b] + + Or specify the actual dtype + + >>> pd.array(['a', 'b', 'a'], + ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)) + [a, b, a] + Categories (3, object): [a < b < c] + + Because omitting the `dtype` passes the data through to NumPy, + a mixture of valid integers and NA will return a floating-point + NumPy array. + + >>> pd.array([1, 2, np.nan]) + + [1.0, 2.0, nan] + Length: 3, dtype: float64 + + To use pandas' nullable :class:`pandas.arrays.IntegerArray`, specify + the dtype: + + >>> pd.array([1, 2, np.nan], dtype='Int64') + + [1, 2, NaN] + Length: 3, dtype: Int64 + + Pandas will infer an ExtensionArray for some types of data: + + >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) + + ['2000-01-01', '2000-01-01'] + Length: 2, dtype: period[D] + + `data` must be 1-dimensional. A ValueError is raised when the input + has the wrong dimensionality. + + >>> pd.array(1) + Traceback (most recent call last): + ... + ValueError: Cannot pass scalar '1' to 'pandas.array'. + """ + from pandas.core.arrays import ( + period_array, ExtensionArray, IntervalArray, PandasArray + ) + from pandas.core.internals.arrays import extract_array + + if lib.is_scalar(data): + msg = ( + "Cannot pass scalar '{}' to 'pandas.array'." + ) + raise ValueError(msg.format(data)) + + data = extract_array(data, extract_numpy=True) + + if dtype is None and isinstance(data, ExtensionArray): + dtype = data.dtype + + # this returns None for not-found dtypes. + if isinstance(dtype, compat.string_types): + dtype = registry.find(dtype) or dtype + + if is_extension_array_dtype(dtype): + cls = dtype.construct_array_type() + return cls._from_sequence(data, dtype=dtype, copy=copy) + + if dtype is None: + inferred_dtype = lib.infer_dtype(data) + if inferred_dtype == 'period': + try: + return period_array(data, copy=copy) + except tslibs.IncompatibleFrequency: + # We may have a mixture of frequencies. + # We choose to return an ndarray, rather than raising. + pass + elif inferred_dtype == 'interval': + try: + return IntervalArray(data, copy=copy) + except ValueError: + # We may have a mixture of `closed` here. + # We choose to return an ndarray, rather than raising. + pass + + # TODO(DatetimeArray): handle this type + # TODO(BooleanArray): handle this type + + result = PandasArray._from_sequence(data, dtype=dtype, copy=copy) + return result diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 1f74a2cb143c8..27e89406ec2d6 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -82,7 +82,9 @@ from_arrays from_tuples from_breaks +overlaps set_closed +to_tuples %(extra_methods)s\ See Also diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d45cd9402d45b..2c7ee5b277a90 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -115,6 +115,11 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, copy : bool, default False Whether to copy the ordinals before storing. + See Also + -------- + period_array : Create a new PeriodArray. + pandas.PeriodIndex : Immutable Index for period data. + Notes ----- There are two components to a PeriodArray @@ -127,11 +132,6 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, The `freq` indicates the span covered by each element of the array. All elements in the PeriodArray have the same `freq`. - - See Also - -------- - period_array : Create a new PeriodArray. - pandas.PeriodIndex : Immutable Index for period data. """ # array priority higher than numpy scalars __array_priority__ = 1000 diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 24407850d44a8..e0d0cf3393dd5 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -40,7 +40,12 @@ class Registry(object): Registry for dtype inference The registry allows one to map a string repr of a extension - dtype to an extenstion dtype. + dtype to an extension dtype. The string alias can be used in several + places, including + + * Series and Index constructors + * :meth:`pandas.array` + * :meth:`pandas.Series.astype` Multiple extension types can be registered. These are tried in order. @@ -623,6 +628,7 @@ def __setstate__(self, state): self._unit = state['unit'] +@register_extension_dtype class PeriodDtype(ExtensionDtype, PandasExtensionDtype): """ A Period duck-typed class, suitable for holding a period with freq dtype. @@ -886,4 +892,3 @@ def is_dtype(cls, dtype): _pandas_registry = Registry() _pandas_registry.register(DatetimeTZDtype) -_pandas_registry.register(PeriodDtype) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index b733ce806981a..07cf358c765b3 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -45,7 +45,13 @@ class TestPDApi(Base): 'Period', 'PeriodIndex', 'RangeIndex', 'UInt64Index', 'Series', 'SparseArray', 'SparseDataFrame', 'SparseDtype', 'SparseSeries', 'Timedelta', - 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex'] + 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex', + 'IntervalArray', + 'CategoricalDtype', 'PeriodDtype', 'IntervalDtype', + 'DatetimeTZDtype', + 'Int8Dtype', 'Int16Dtype', 'Int32Dtype', 'Int64Dtype', + 'UInt8Dtype', 'UInt16Dtype', 'UInt32Dtype', 'UInt64Dtype', + ] # these are already deprecated; awaiting removal deprecated_classes = ['TimeGrouper'] @@ -57,7 +63,7 @@ class TestPDApi(Base): modules = ['np', 'datetime'] # top-level functions - funcs = ['bdate_range', 'concat', 'crosstab', 'cut', + funcs = ['array', 'bdate_range', 'concat', 'crosstab', 'cut', 'date_range', 'interval_range', 'eval', 'factorize', 'get_dummies', 'infer_freq', 'isna', 'isnull', 'lreshape', diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py new file mode 100644 index 0000000000000..76ef85b0317ad --- /dev/null +++ b/pandas/tests/arrays/test_array.py @@ -0,0 +1,181 @@ +import decimal + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import registry + +import pandas as pd +from pandas.api.extensions import register_extension_dtype +from pandas.core.arrays import PandasArray, integer_array, period_array +from pandas.tests.extension.decimal import ( + DecimalArray, DecimalDtype, to_decimal) +import pandas.util.testing as tm + + +@pytest.mark.parametrize("data, dtype, expected", [ + # Basic NumPy defaults. + ([1, 2], None, PandasArray(np.array([1, 2]))), + ([1, 2], object, PandasArray(np.array([1, 2], dtype=object))), + ([1, 2], np.dtype('float32'), + PandasArray(np.array([1., 2.0], dtype=np.dtype('float32')))), + (np.array([1, 2]), None, PandasArray(np.array([1, 2]))), + + # String alias passes through to NumPy + ([1, 2], 'float32', PandasArray(np.array([1, 2], dtype='float32'))), + + # Period alias + ([pd.Period('2000', 'D'), pd.Period('2001', 'D')], 'Period[D]', + period_array(['2000', '2001'], freq='D')), + + # Period dtype + ([pd.Period('2000', 'D')], pd.PeriodDtype('D'), + period_array(['2000'], freq='D')), + + # Datetime (naive) + ([1, 2], np.dtype('datetime64[ns]'), + PandasArray(np.array([1, 2], dtype='datetime64[ns]'))), + # TODO(DatetimeArray): add here + + # Category + (['a', 'b'], 'category', pd.Categorical(['a', 'b'])), + (['a', 'b'], pd.CategoricalDtype(None, ordered=True), + pd.Categorical(['a', 'b'], ordered=True)), + + # Interval + ([pd.Interval(1, 2), pd.Interval(3, 4)], 'interval', + pd.IntervalArray.from_tuples([(1, 2), (3, 4)])), + + # Sparse + ([0, 1], 'Sparse[int64]', pd.SparseArray([0, 1], dtype='int64')), + + # IntegerNA + ([1, None], 'Int16', integer_array([1, None], dtype='Int16')), + (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), + + # Index + (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), + + # Series[EA] returns the EA + (pd.Series(pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])), + None, + pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])), + + # "3rd party" EAs work + ([decimal.Decimal(0), decimal.Decimal(1)], 'decimal', to_decimal([0, 1])), + + # pass an ExtensionArray, but a different dtype + (period_array(['2000', '2001'], freq='D'), + 'category', + pd.Categorical([pd.Period('2000', 'D'), pd.Period('2001', 'D')])), +]) +def test_array(data, dtype, expected): + result = pd.array(data, dtype=dtype) + tm.assert_equal(result, expected) + + +def test_array_copy(): + a = np.array([1, 2]) + # default is to copy + b = pd.array(a) + assert np.shares_memory(a, b._ndarray) is False + + # copy=True + b = pd.array(a, copy=True) + assert np.shares_memory(a, b._ndarray) is False + + # copy=False + b = pd.array(a, copy=False) + assert np.shares_memory(a, b._ndarray) is True + + +@pytest.mark.parametrize('data, expected', [ + ([pd.Period("2000", "D"), pd.Period("2001", "D")], + period_array(["2000", "2001"], freq="D")), + ([pd.Interval(0, 1), pd.Interval(1, 2)], + pd.IntervalArray.from_breaks([0, 1, 2])), +]) +def test_array_inference(data, expected): + result = pd.array(data) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize('data', [ + # mix of frequencies + [pd.Period("2000", "D"), pd.Period("2001", "A")], + # mix of closed + [pd.Interval(0, 1, closed='left'), pd.Interval(1, 2, closed='right')], +]) +def test_array_inference_fails(data): + result = pd.array(data) + expected = PandasArray(np.array(data, dtype=object)) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("data", [ + np.array([[1, 2], [3, 4]]), + [[1, 2], [3, 4]], +]) +def test_nd_raises(data): + with pytest.raises(ValueError, match='PandasArray must be 1-dimensional'): + pd.array(data) + + +def test_scalar_raises(): + with pytest.raises(ValueError, + match="Cannot pass scalar '1'"): + pd.array(1) + +# --------------------------------------------------------------------------- +# A couple dummy classes to ensure that Series and Indexes are unboxed before +# getting to the EA classes. + + +@register_extension_dtype +class DecimalDtype2(DecimalDtype): + name = 'decimal2' + + @classmethod + def construct_array_type(cls): + return DecimalArray2 + + +class DecimalArray2(DecimalArray): + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + if isinstance(scalars, (pd.Series, pd.Index)): + raise TypeError + + return super(DecimalArray2, cls)._from_sequence( + scalars, dtype=dtype, copy=copy + ) + + +@pytest.mark.parametrize("box", [pd.Series, pd.Index]) +def test_array_unboxes(box): + data = box([decimal.Decimal('1'), decimal.Decimal('2')]) + # make sure it works + with pytest.raises(TypeError): + DecimalArray2._from_sequence(data) + + result = pd.array(data, dtype='decimal2') + expected = DecimalArray2._from_sequence(data.values) + tm.assert_equal(result, expected) + + +@pytest.fixture +def registry_without_decimal(): + idx = registry.dtypes.index(DecimalDtype) + registry.dtypes.pop(idx) + yield + registry.dtypes.append(DecimalDtype) + + +def test_array_not_registered(registry_without_decimal): + # check we aren't on it + assert registry.find('decimal') is None + data = [decimal.Decimal('1'), decimal.Decimal('2')] + + result = pd.array(data, dtype=DecimalDtype) + expected = DecimalArray._from_sequence(data) + tm.assert_equal(result, expected) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 7fb88640e5fb4..82025cd972e6b 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -4,15 +4,23 @@ from pandas._libs.tslibs import iNaT from pandas._libs.tslibs.period import IncompatibleFrequency -from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.core.dtypes.dtypes import PeriodDtype, registry import pandas as pd from pandas.core.arrays import PeriodArray, period_array import pandas.util.testing as tm # ---------------------------------------------------------------------------- -# Constructors +# Dtype + +def test_registered(): + assert PeriodDtype in registry.dtypes + result = registry.find("Period[D]") + expected = PeriodDtype("D") + assert result == expected + +# ---------------------------------------------------------------------------- # period_array diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 81d08ac71bf6d..77dc04e9453a9 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -798,13 +798,13 @@ def test_update_dtype_errors(self, bad_dtype): @pytest.mark.parametrize('dtype', [ CategoricalDtype, IntervalDtype, + PeriodDtype, ]) def test_registry(dtype): assert dtype in registry.dtypes @pytest.mark.parametrize('dtype', [ - PeriodDtype, DatetimeTZDtype, ]) def test_pandas_registry(dtype): @@ -817,6 +817,7 @@ def test_pandas_registry(dtype): ('interval', IntervalDtype()), ('interval[int64]', IntervalDtype()), ('interval[datetime64[ns]]', IntervalDtype('datetime64[ns]')), + ('period[D]', PeriodDtype('D')), ('category', CategoricalDtype()), ]) def test_registry_find(dtype, expected): @@ -824,7 +825,6 @@ def test_registry_find(dtype, expected): @pytest.mark.parametrize('dtype, expected', [ - ('period[D]', PeriodDtype('D')), ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern')), ]) def test_pandas_registry_find(dtype, expected): diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 3b966cd8d4774..9c719b1304629 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -1,3 +1,4 @@ +import numpy as np import pytest import pandas as pd @@ -55,3 +56,14 @@ def test_from_dtype(self, data): result = pd.Series(list(data), dtype=str(dtype)) self.assert_series_equal(result, expected) + + def test_pandas_array(self, data): + # pd.array(extension_array) should be idempotent... + result = pd.array(data) + self.assert_extension_array_equal(result, data) + + def test_pandas_array_dtype(self, data): + # ... but specifying dtype will override idempotency + result = pd.array(data, dtype=np.dtype(object)) + expected = pd.arrays.PandasArray(np.asarray(data, dtype=object)) + self.assert_equal(result, expected) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 79e81f1034c6d..05671bdf13318 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -8,9 +8,11 @@ from pandas.core.dtypes.base import ExtensionDtype import pandas as pd +from pandas.api.extensions import register_extension_dtype from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin +@register_extension_dtype class DecimalDtype(ExtensionDtype): type = decimal.Decimal name = 'decimal'