diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index b4331aab3085f..cca15f26cbf99 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -411,6 +411,7 @@ Other Enhancements - :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`) - :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the ``axis`` parameter (:issue:`8839`) - The ``scatter_matrix``, ``andrews_curves``, ``parallel_coordinates``, ``lag_plot``, ``autocorrelation_plot``, ``bootstrap_plot``, and ``radviz`` plots from the ``pandas.plotting`` module are now accessible from calling :meth:`DataFrame.plot` (:issue:`11978`) +- :meth:`DataFrame.to_records` now accepts ``index_dtypes`` and ``column_dtypes`` parameters to allow different data types in stored column and index records (:issue:`18146`) - :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`) - :func:`pandas.DataFrame.to_sql` has gained the ``method`` argument to control SQL insertion clause. See the :ref:`insertion method ` section in the documentation. (:issue:`8953`) diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 241a1b471f677..b11542622451c 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -398,8 +398,11 @@ def is_dict_like(obj): >>> is_dict_like([1, 2, 3]) False """ + for attr in ("__getitem__", "keys", "__contains__"): + if not hasattr(obj, attr): + return False - return hasattr(obj, '__getitem__') and hasattr(obj, 'keys') + return True def is_named_tuple(obj): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 99ae551d3c55b..99653248216f5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -35,7 +35,6 @@ OrderedDict, PY36, raise_with_traceback, string_and_binary_types) from pandas.compat.numpy import function as nv - from pandas.core.dtypes.cast import ( maybe_upcast, cast_scalar_to_array, @@ -49,6 +48,7 @@ maybe_upcast_putmask, find_common_type) from pandas.core.dtypes.common import ( + is_dict_like, is_object_dtype, is_extension_type, is_extension_array_dtype, @@ -1540,7 +1540,8 @@ def from_records(cls, data, index=None, exclude=None, columns=None, return cls(mgr) - def to_records(self, index=True, convert_datetime64=None): + def to_records(self, index=True, convert_datetime64=None, + column_dtypes=None, index_dtypes=None): """ Convert DataFrame to a NumPy record array. @@ -1557,6 +1558,20 @@ def to_records(self, index=True, convert_datetime64=None): Whether to convert the index to datetime.datetime if it is a DatetimeIndex. + column_dtypes : str, type, dict, default None + .. versionadded:: 0.24.0 + + If a string or type, the data type to store all columns. If + a dictionary, a mapping of column names and indices (zero-indexed) + to specific data types. + index_dtypes : str, type, dict, default None + .. versionadded:: 0.24.0 + + If a string or type, the data type to store all index levels. If + a dictionary, a mapping of index level names and indices + (zero-indexed) to specific data types. + + This mapping is applied only if `index=True`. Returns ------- @@ -1598,6 +1613,23 @@ def to_records(self, index=True, convert_datetime64=None): >>> df.to_records(index=False) rec.array([(1, 0.5 ), (2, 0.75)], dtype=[('A', '>> df.to_records(column_dtypes={"A": "int32"}) + rec.array([('a', 1, 0.5 ), ('b', 2, 0.75)], + dtype=[('I', 'O'), ('A', '>> df.to_records(index_dtypes=">> index_dtypes = ">> df.to_records(index_dtypes=index_dtypes) + rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)], + dtype=[('I', 'S1'), ('A', ' default to array dtypes. + (dict(), + np.rec.array([(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")], + dtype=[("index", "