From 787dc8aa90813bb8509ffda84cd2f828d772c7fb Mon Sep 17 00:00:00 2001 From: Anna Daglis <40292327+AnnaDaglis@users.noreply.github.com> Date: Mon, 9 Mar 2020 15:42:16 +0000 Subject: [PATCH] BUG: Fixed bug, where pandas._libs.lib.maybe_convert_objects function improperly handled arrays with bools and NaNs (#32242) * BUG: Fixed bug, where pandas._libs.lib.maybe_convert_objects function improperly handled arrays with bools and NaNs --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/_libs/lib.pyx | 2 +- pandas/tests/dtypes/test_inference.py | 7 +++++ pandas/tests/indexes/test_base.py | 11 ++++++++ .../tests/series/methods/test_value_counts.py | 26 +++++++++++++++++++ 5 files changed, 46 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 808e6ae709ce9..eec471f989037 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -24,6 +24,7 @@ Fixed regressions - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) - Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) +- Fixed bug in the repr of an object-dtype ``Index`` with bools and missing values (:issue:`32146`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index feccb447c5112..db1abae4d6ff3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2296,7 +2296,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, return uints else: return ints - elif seen.is_bool: + elif seen.is_bool and not seen.nan_: return bools.view(np.bool_) return objects diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index b01747ef010c1..e0fef833d4ced 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -575,6 +575,13 @@ def test_maybe_convert_objects_nullable_integer(self, exp): tm.assert_extension_array_equal(result, exp) + def test_maybe_convert_objects_bool_nan(self): + # GH32146 + ind = pd.Index([True, False, np.nan], dtype=object) + exp = np.array([True, False, np.nan], dtype=object) + out = lib.maybe_convert_objects(ind.values, safe=1) + tm.assert_numpy_array_equal(out, exp) + def test_mixed_dtypes_remain_object_array(self): # GH14956 array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 73af98c16afae..9e0cef375fea3 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2458,6 +2458,17 @@ def test_intersect_str_dates(self): expected = Index([], dtype=object) tm.assert_index_equal(result, expected) + def test_index_repr_bool_nan(self): + # GH32146 + arr = Index([True, False, np.nan], dtype=object) + exp1 = arr.format() + out1 = ["True", "False", "NaN"] + assert out1 == exp1 + + exp2 = repr(arr) + out2 = "Index([True, False, nan], dtype='object')" + assert out2 == exp2 + class TestIndexUtils: @pytest.mark.parametrize( diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py index fdb35befeb0c2..f97362ce9c2a9 100644 --- a/pandas/tests/series/methods/test_value_counts.py +++ b/pandas/tests/series/methods/test_value_counts.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import pandas as pd from pandas import Categorical, CategoricalIndex, Series @@ -177,3 +178,28 @@ def test_value_counts_categorical_with_nan(self): exp = Series([2, 1, 3], index=CategoricalIndex(["a", "b", np.nan])) res = ser.value_counts(dropna=False, sort=False) tm.assert_series_equal(res, exp) + + @pytest.mark.parametrize( + "ser, dropna, exp", + [ + ( + pd.Series([False, True, True, pd.NA]), + False, + pd.Series([2, 1, 1], index=[True, False, pd.NA]), + ), + ( + pd.Series([False, True, True, pd.NA]), + True, + pd.Series([2, 1], index=[True, False]), + ), + ( + pd.Series(range(3), index=[True, False, np.nan]).index, + False, + pd.Series([1, 1, 1], index=[True, False, pd.NA]), + ), + ], + ) + def test_value_counts_bool_with_nan(self, ser, dropna, exp): + # GH32146 + out = ser.value_counts(dropna=dropna) + tm.assert_series_equal(out, exp)