diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d036049e3ffdb..e0d15c218ec85 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -826,7 +826,7 @@ Bug Fixes - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) -- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`) +- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 60684a929889b..6487c2108028e 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -9,7 +9,8 @@ from pandas.core.base import PandasObject -from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype +from pandas.types.dtypes import (ExtensionDtype, DatetimeTZDtype, + CategoricalDtype) from pandas.types.common import (_TD_DTYPE, _NS_DTYPE, _ensure_int64, _ensure_platform_int, is_integer, @@ -4496,55 +4497,13 @@ def _interleaved_dtype(blocks): if not len(blocks): return None - counts = defaultdict(list) - for x in blocks: - counts[type(x)].append(x) - - have_int = len(counts[IntBlock]) > 0 - have_bool = len(counts[BoolBlock]) > 0 - have_object = len(counts[ObjectBlock]) > 0 - have_float = len(counts[FloatBlock]) > 0 - have_complex = len(counts[ComplexBlock]) > 0 - have_dt64 = len(counts[DatetimeBlock]) > 0 - have_dt64_tz = len(counts[DatetimeTZBlock]) > 0 - have_td64 = len(counts[TimeDeltaBlock]) > 0 - have_cat = len(counts[CategoricalBlock]) > 0 - # TODO: have_sparse is not used - have_sparse = len(counts[SparseBlock]) > 0 # noqa - have_numeric = have_float or have_complex or have_int - has_non_numeric = have_dt64 or have_dt64_tz or have_td64 or have_cat - - if (have_object or - (have_bool and - (have_numeric or have_dt64 or have_dt64_tz or have_td64)) or - (have_numeric and has_non_numeric) or have_cat or have_dt64 or - have_dt64_tz or have_td64): - return np.dtype(object) - elif have_bool: - return np.dtype(bool) - elif have_int and not have_float and not have_complex: - # if we are mixing unsigned and signed, then return - # the next biggest int type (if we can) - lcd = _find_common_type([b.dtype for b in counts[IntBlock]]) - kinds = set([i.dtype.kind for i in counts[IntBlock]]) - if len(kinds) == 1: - return lcd - - if lcd == 'uint64' or lcd == 'int64': - return np.dtype('int64') - - # return 1 bigger on the itemsize if unsinged - if lcd.kind == 'u': - return np.dtype('int%s' % (lcd.itemsize * 8 * 2)) - return lcd - - elif have_int and have_float and not have_complex: - return np.dtype('float64') - elif have_complex: - return np.dtype('c16') - else: - introspection_blks = counts[FloatBlock] + counts[SparseBlock] - return _find_common_type([b.dtype for b in introspection_blks]) + dtype = _find_common_type([b.dtype for b in blocks]) + + # only numpy compat + if isinstance(dtype, ExtensionDtype): + dtype = np.object + + return dtype def _consolidate(blocks): diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index df95f563c0832..7216c05657102 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -1183,19 +1183,9 @@ def _assert_replace_conversion(self, from_key, to_key, how): result = obj.replace(replacer) - # buggy on windows for bool/int64 - if (from_key == 'bool' and - to_key == 'int64' and - tm.is_platform_windows()): - pytest.skip("windows platform buggy: {0} -> {1}".format - (from_key, to_key)) - - if ((from_key == 'float64' and to_key in ('bool', 'int64')) or + if ((from_key == 'float64' and to_key in ('int64')) or (from_key == 'complex128' and - to_key in ('bool', 'int64', 'float64')) or - - # GH12747 The result must be int? - (from_key == 'int64' and to_key in ('bool'))): + to_key in ('int64', 'float64'))): # buggy on 32-bit if tm.is_platform_32bit(): diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index f5a25e93cc82d..0a53581e24ba5 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -152,8 +152,8 @@ def check_replace(to_rep, val, expected): tr, v = [3, 4], [3.5, pd.Timestamp('20130101')] check_replace(tr, v, e) - # casts to float - e = pd.Series([0, 1, 2, 3.5, 1]) + # casts to object + e = pd.Series([0, 1, 2, 3.5, True], dtype='object') tr, v = [3, 4], [3.5, True] check_replace(tr, v, e) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index 70f69cc7d5701..d7b086daea1e3 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -238,6 +238,20 @@ def test_numpy_dtypes(self): ((np.object, np.float32), np.object), ((np.object, np.int16), np.object), + # bool with int + ((np.dtype('bool'), np.int64), np.object), + ((np.dtype('bool'), np.int32), np.object), + ((np.dtype('bool'), np.int16), np.object), + ((np.dtype('bool'), np.int8), np.object), + ((np.dtype('bool'), np.uint64), np.object), + ((np.dtype('bool'), np.uint32), np.object), + ((np.dtype('bool'), np.uint16), np.object), + ((np.dtype('bool'), np.uint8), np.object), + + # bool with float + ((np.dtype('bool'), np.float64), np.object), + ((np.dtype('bool'), np.float32), np.object), + ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')), np.dtype('datetime64[ns]')), ((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')), diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 11a837dd21159..0e26cd085db5a 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -892,12 +892,28 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'): def _find_common_type(types): - """Find a common data type among the given dtypes.""" + """ + Find a common data type among the given dtypes. + + Parameters + ---------- + types : list of dtypes + + Returns + ------- + pandas extension or numpy dtype + + See Also + -------- + numpy.find_common_type + + """ if len(types) == 0: raise ValueError('no types given') first = types[0] + # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2) # => object if all(is_dtype_equal(first, t) for t in types[1:]): @@ -912,4 +928,14 @@ def _find_common_type(types): if all(is_timedelta64_dtype(t) for t in types): return np.dtype('timedelta64[ns]') + # don't mix bool / int or float or complex + # this is different from numpy, which casts bool with float/int as int + has_bools = any(is_bool_dtype(t) for t in types) + if has_bools: + has_ints = any(is_integer_dtype(t) for t in types) + has_floats = any(is_float_dtype(t) for t in types) + has_complex = any(is_complex_dtype(t) for t in types) + if has_ints or has_floats or has_complex: + return np.object + return np.find_common_type(types, [])