Skip to content

Commit

Permalink
CLN: replace _interleave_dtype with _find_common_type
Browse files Browse the repository at this point in the history
xref pandas-dev#15736   xref pandas-dev#12780

Author: Jeff Reback <jeff@reback.net>

Closes pandas-dev#15765 from jreback/common_types and squashes the following commits:

d472646 [Jeff Reback] try removing restriction on windows
8d07cae [Jeff Reback] CLN: replace _interleave_dtype with _find_common_type
  • Loading branch information
jreback authored and AnkurDedania committed Mar 21, 2017
1 parent 0fa874b commit fb72afc
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 66 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ Bug Fixes


- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`)
- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`)
- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`)

- Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`)

Expand Down
59 changes: 9 additions & 50 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

from pandas.core.base import PandasObject

from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype
from pandas.types.dtypes import (ExtensionDtype, DatetimeTZDtype,
CategoricalDtype)
from pandas.types.common import (_TD_DTYPE, _NS_DTYPE,
_ensure_int64, _ensure_platform_int,
is_integer,
Expand Down Expand Up @@ -4496,55 +4497,13 @@ def _interleaved_dtype(blocks):
if not len(blocks):
return None

counts = defaultdict(list)
for x in blocks:
counts[type(x)].append(x)

have_int = len(counts[IntBlock]) > 0
have_bool = len(counts[BoolBlock]) > 0
have_object = len(counts[ObjectBlock]) > 0
have_float = len(counts[FloatBlock]) > 0
have_complex = len(counts[ComplexBlock]) > 0
have_dt64 = len(counts[DatetimeBlock]) > 0
have_dt64_tz = len(counts[DatetimeTZBlock]) > 0
have_td64 = len(counts[TimeDeltaBlock]) > 0
have_cat = len(counts[CategoricalBlock]) > 0
# TODO: have_sparse is not used
have_sparse = len(counts[SparseBlock]) > 0 # noqa
have_numeric = have_float or have_complex or have_int
has_non_numeric = have_dt64 or have_dt64_tz or have_td64 or have_cat

if (have_object or
(have_bool and
(have_numeric or have_dt64 or have_dt64_tz or have_td64)) or
(have_numeric and has_non_numeric) or have_cat or have_dt64 or
have_dt64_tz or have_td64):
return np.dtype(object)
elif have_bool:
return np.dtype(bool)
elif have_int and not have_float and not have_complex:
# if we are mixing unsigned and signed, then return
# the next biggest int type (if we can)
lcd = _find_common_type([b.dtype for b in counts[IntBlock]])
kinds = set([i.dtype.kind for i in counts[IntBlock]])
if len(kinds) == 1:
return lcd

if lcd == 'uint64' or lcd == 'int64':
return np.dtype('int64')

# return 1 bigger on the itemsize if unsinged
if lcd.kind == 'u':
return np.dtype('int%s' % (lcd.itemsize * 8 * 2))
return lcd

elif have_int and have_float and not have_complex:
return np.dtype('float64')
elif have_complex:
return np.dtype('c16')
else:
introspection_blks = counts[FloatBlock] + counts[SparseBlock]
return _find_common_type([b.dtype for b in introspection_blks])
dtype = _find_common_type([b.dtype for b in blocks])

# only numpy compat
if isinstance(dtype, ExtensionDtype):
dtype = np.object

return dtype


def _consolidate(blocks):
Expand Down
14 changes: 2 additions & 12 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1183,19 +1183,9 @@ def _assert_replace_conversion(self, from_key, to_key, how):

result = obj.replace(replacer)

# buggy on windows for bool/int64
if (from_key == 'bool' and
to_key == 'int64' and
tm.is_platform_windows()):
pytest.skip("windows platform buggy: {0} -> {1}".format
(from_key, to_key))

if ((from_key == 'float64' and to_key in ('bool', 'int64')) or
if ((from_key == 'float64' and to_key in ('int64')) or
(from_key == 'complex128' and
to_key in ('bool', 'int64', 'float64')) or

# GH12747 The result must be int?
(from_key == 'int64' and to_key in ('bool'))):
to_key in ('int64', 'float64'))):

# buggy on 32-bit
if tm.is_platform_32bit():
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/series/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ def check_replace(to_rep, val, expected):
tr, v = [3, 4], [3.5, pd.Timestamp('20130101')]
check_replace(tr, v, e)

# casts to float
e = pd.Series([0, 1, 2, 3.5, 1])
# casts to object
e = pd.Series([0, 1, 2, 3.5, True], dtype='object')
tr, v = [3, 4], [3.5, True]
check_replace(tr, v, e)

Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/types/test_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,20 @@ def test_numpy_dtypes(self):
((np.object, np.float32), np.object),
((np.object, np.int16), np.object),

# bool with int
((np.dtype('bool'), np.int64), np.object),
((np.dtype('bool'), np.int32), np.object),
((np.dtype('bool'), np.int16), np.object),
((np.dtype('bool'), np.int8), np.object),
((np.dtype('bool'), np.uint64), np.object),
((np.dtype('bool'), np.uint32), np.object),
((np.dtype('bool'), np.uint16), np.object),
((np.dtype('bool'), np.uint8), np.object),

# bool with float
((np.dtype('bool'), np.float64), np.object),
((np.dtype('bool'), np.float32), np.object),

((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')),
np.dtype('datetime64[ns]')),
((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')),
Expand Down
28 changes: 27 additions & 1 deletion pandas/types/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,12 +892,28 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'):


def _find_common_type(types):
"""Find a common data type among the given dtypes."""
"""
Find a common data type among the given dtypes.
Parameters
----------
types : list of dtypes
Returns
-------
pandas extension or numpy dtype
See Also
--------
numpy.find_common_type
"""

if len(types) == 0:
raise ValueError('no types given')

first = types[0]

# workaround for find_common_type([np.dtype('datetime64[ns]')] * 2)
# => object
if all(is_dtype_equal(first, t) for t in types[1:]):
Expand All @@ -912,4 +928,14 @@ def _find_common_type(types):
if all(is_timedelta64_dtype(t) for t in types):
return np.dtype('timedelta64[ns]')

# don't mix bool / int or float or complex
# this is different from numpy, which casts bool with float/int as int
has_bools = any(is_bool_dtype(t) for t in types)
if has_bools:
has_ints = any(is_integer_dtype(t) for t in types)
has_floats = any(is_float_dtype(t) for t in types)
has_complex = any(is_complex_dtype(t) for t in types)
if has_ints or has_floats or has_complex:
return np.object

return np.find_common_type(types, [])

0 comments on commit fb72afc

Please sign in to comment.