API: added array (pandas-dev#23581)

Pingviinituutti · Feb 28, 2019 · 89cd33c · 89cd33c
1 parent 3dfcaec
commit 89cd33c
Show file tree

Hide file tree

Showing 15 changed files with 595 additions and 16 deletions.
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -720,6 +720,19 @@ strings and apply several methods to it. These can be accessed like
        Series.dt
        Index.str
 
+
+.. _api.arrays:
+
+Arrays
+------
+
+Pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`).
+
+.. autosummary::
+   :toctree: generated/
+
+   array
+
 .. _api.categorical:
 
 Categorical
@@ -808,6 +821,65 @@ following usable methods and properties:
    Series.cat.as_ordered
    Series.cat.as_unordered
 
+.. _api.arrays.integerna:
+
+Integer-NA
+~~~~~~~~~~
+
+:class:`arrays.IntegerArray` can hold integer data, potentially with missing
+values.
+
+.. autosummary::
+   :toctree: generated/
+
+   arrays.IntegerArray
+
+.. _api.arrays.interval:
+
+Interval
+~~~~~~~~
+
+:class:`IntervalArray` is an array for storing data representing intervals.
+The scalar type is a :class:`Interval`. These may be stored in a :class:`Series`
+or as a :class:`IntervalIndex`. :class:`IntervalArray` can be closed on the
+``'left'``, ``'right'``, or ``'both'``, or ``'neither'`` sides.
+See :ref:`indexing.intervallindex` for more.
+
+.. currentmodule:: pandas
+
+.. autosummary::
+   :toctree: generated/
+
+   IntervalArray
+
+.. _api.arrays.period:
+
+Period
+~~~~~~
+
+Periods represent a span of time (e.g. the year 2000, or the hour from 11:00 to 12:00
+on January 1st, 2000). A collection of :class:`Period` objects with a common frequency
+can be collected in a :class:`PeriodArray`. See :ref:`timeseries.periods` for more.
+
+.. autosummary::
+   :toctree: generated/
+
+   arrays.PeriodArray
+
+Sparse
+~~~~~~
+
+Sparse data may be stored and operated on more efficiently when there is a single value
+that's often repeated. :class:`SparseArray` is a container for this type of data.
+See :ref:`sparse` for more.
+
+.. _api.arrays.sparse:
+
+.. autosummary::
+   :toctree: generated/
+
+   SparseArray
+
 Plotting
 ~~~~~~~~
 
@@ -1701,6 +1773,7 @@ IntervalIndex Components
    IntervalIndex.get_indexer
    IntervalIndex.set_closed
    IntervalIndex.overlaps
+   IntervalArray.to_tuples
 
 
 .. _api.multiindex:
@@ -1933,6 +2006,8 @@ Methods
     PeriodIndex.strftime
     PeriodIndex.to_timestamp
 
+.. api.scalars:
+
 Scalars
 -------
 

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -161,6 +161,41 @@ Reduction and groupby operations such as 'sum' work.
 
    The Integer NA support currently uses the captilized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date.
 
+.. _whatsnew_0240.enhancements.array:
+
+A new top-level method :func:`array` has been added for creating 1-dimensional arrays (:issue:`22860`).
+This can be used to create any :ref:`extension array <extending.extension-types>`, including
+extension arrays registered by :ref:`3rd party libraries <ecosystem.extensions>`. See
+
+See :ref:`Dtypes <basics.dtypes>` for more on extension arrays.
+
+.. ipython:: python
+
+   pd.array([1, 2, np.nan], dtype='Int64')
+   pd.array(['a', 'b', 'c'], dtype='category')
+
+Passing data for which there isn't dedicated extension type (e.g. float, integer, etc.)
+will return a new :class:`arrays.PandasArray`, which is just a thin (no-copy)
+wrapper around a :class:`numpy.ndarray` that satisfies the extension array interface.
+
+.. ipython:: python
+
+   pd.array([1, 2, 3])
+
+On their own, a :class:`arrays.PandasArray` isn't a very useful object.
+But if you need write low-level code that works generically for any
+:class:`~pandas.api.extensions.ExtensionArray`, :class:`arrays.PandasArray`
+satisfies that need.
+
+Notice that by default, if no ``dtype`` is specified, the dtype of the returned
+array is inferred from the data. In particular, note that the first example of
+``[1, 2, np.nan]`` would have returned a floating-point array, since ``NaN``
+is a float.
+
+.. ipython:: python
+
+   pd.array([1, 2, np.nan])
+
 .. _whatsnew_0240.enhancements.read_html:
 
 ``read_html`` Enhancements

diff --git a/pandas/arrays/__init__.py b/pandas/arrays/__init__.py
@@ -3,9 +3,17 @@
 
 See :ref:`extending.extension-types` for more.
 """
-from pandas.core.arrays import PandasArray
+from pandas.core.arrays import (
+    IntervalArray, PeriodArray, Categorical, SparseArray, IntegerArray,
+    PandasArray
+)
 
 
 __all__ = [
-    'PandasArray'
+    'Categorical',
+    'IntegerArray',
+    'IntervalArray',
+    'PandasArray',
+    'PeriodArray',
+    'SparseArray',
 ]
diff --git a/pandas/core/api.py b/pandas/core/api.py
@@ -4,9 +4,26 @@
 
 import numpy as np
 
+from pandas.core.arrays import IntervalArray
+from pandas.core.arrays.integer import (
+    Int8Dtype,
+    Int16Dtype,
+    Int32Dtype,
+    Int64Dtype,
+    UInt8Dtype,
+    UInt16Dtype,
+    UInt32Dtype,
+    UInt64Dtype,
+)
 from pandas.core.algorithms import factorize, unique, value_counts
 from pandas.core.dtypes.missing import isna, isnull, notna, notnull
-from pandas.core.arrays import Categorical
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
+    PeriodDtype,
+    IntervalDtype,
+    DatetimeTZDtype,
+)
+from pandas.core.arrays import Categorical, array
 from pandas.core.groupby import Grouper
 from pandas.io.formats.format import set_eng_float_format
 from pandas.core.index import (Index, CategoricalIndex, Int64Index,

diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
@@ -1,3 +1,4 @@
+from .array_ import array  # noqa
 from .base import (ExtensionArray,    # noqa
                    ExtensionOpsMixin,
                    ExtensionScalarOpsMixin)