diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 9271f58947f953..cae0d1a754d891 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -61,6 +61,7 @@ Bug Fixes - Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`) - Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`) +- Bug in :meth:`DataFrame.drop` behaviour is not consistent for unique and non-unique indexes (:issue:`21494`) - Bug in :func:`DataFrame.duplicated` with a large number of columns causing a 'maximum recursion depth exceeded' (:issue:`21524`). - diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1780e359164e2f..9902da40944042 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3129,7 +3129,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): """ axis = self._get_axis_number(axis) axis_name = self._get_axis_name(axis) - axis, axis_ = self._get_axis(axis), axis + axis = self._get_axis(axis) if axis.is_unique: if level is not None: @@ -3138,24 +3138,25 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): new_axis = axis.drop(labels, level=level, errors=errors) else: new_axis = axis.drop(labels, errors=errors) - dropped = self.reindex(**{axis_name: new_axis}) - try: - dropped.axes[axis_].set_names(axis.names, inplace=True) - except AttributeError: - pass - result = dropped + result = self.reindex(**{axis_name: new_axis}) + # Case for non-unique axis else: labels = _ensure_object(com._index_labels_to_array(labels)) if level is not None: if not isinstance(axis, MultiIndex): raise AssertionError('axis must be a MultiIndex') indexer = ~axis.get_level_values(level).isin(labels) + + # GH 18561 MultiIndex.drop should raise if label is absent + if errors == 'raise' and indexer.all(): + raise KeyError('{} not found in axis'.format(labels)) else: indexer = ~axis.isin(labels) - - if errors == 'raise' and indexer.all(): - raise KeyError('{} not found in axis'.format(labels)) + # Check if label doesn't exist along axis + labels_missing = (axis.get_indexer_for(labels) == -1).any() + if errors == 'raise' and labels_missing: + raise KeyError('{} not found in axis'.format(labels)) slicer = [slice(None)] * self.ndim slicer[self._get_axis_number(axis_name)] = indexer diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ac33ffad762cd5..4f140a6e77b2fc 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4341,7 +4341,7 @@ def drop(self, labels, errors='raise'): Raises ------ KeyError - If none of the labels are found in the selected axis + If not all of the labels are found in the selected axis """ arr_dtype = 'object' if self.dtype == 'object' else None labels = com._index_labels_to_array(labels, dtype=arr_dtype) @@ -4350,7 +4350,7 @@ def drop(self, labels, errors='raise'): if mask.any(): if errors != 'ignore': raise KeyError( - 'labels %s not contained in axis' % labels[mask]) + '{} not found in axis'.format(labels[mask])) indexer = indexer[~mask] return self.delete(indexer) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ab23a80acdaaea..61b50f139dd100 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1707,7 +1707,6 @@ def drop(self, labels, level=None, errors='raise'): if errors != 'ignore': raise ValueError('labels %s not contained in axis' % labels[mask]) - indexer = indexer[~mask] except Exception: pass diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 28e82f75858508..0e0d6598f51013 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -1151,3 +1151,18 @@ def test_raise_on_drop_duplicate_index(self, actual): expected_no_err = actual.T.drop('c', axis=1, level=level, errors='ignore') assert_frame_equal(expected_no_err.T, actual) + + @pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2]]) + @pytest.mark.parametrize('drop_labels', [[], [1], [2]]) + def test_drop_empty_list(self, index, drop_labels): + # GH 21494 + expected_index = [i for i in index if i not in drop_labels] + frame = pd.DataFrame(index=index).drop(drop_labels) + tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index)) + + @pytest.mark.parametrize('index', [[1, 2, 3], [1, 2, 2]]) + @pytest.mark.parametrize('drop_labels', [[1, 4], [4, 5]]) + def test_drop_non_empty_list(self, index, drop_labels): + # GH 21494 + with tm.assert_raises_regex(KeyError, 'not found in axis'): + pd.DataFrame(index=index).drop(drop_labels) diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index bcd5a64402c336..561d6a9b425080 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -472,54 +472,86 @@ def test_rename(): assert result.name == expected.name -def test_drop(): - # unique - s = Series([1, 2], index=['one', 'two']) - expected = Series([1], index=['one']) - result = s.drop(['two']) - assert_series_equal(result, expected) - result = s.drop('two', axis='rows') - assert_series_equal(result, expected) - - # non-unique - # GH 5248 - s = Series([1, 1, 2], index=['one', 'two', 'one']) - expected = Series([1, 2], index=['one', 'one']) - result = s.drop(['two'], axis=0) - assert_series_equal(result, expected) - result = s.drop('two') - assert_series_equal(result, expected) - - expected = Series([1], index=['two']) - result = s.drop(['one']) - assert_series_equal(result, expected) - result = s.drop('one') - assert_series_equal(result, expected) +@pytest.mark.parametrize( + 'data, index, drop_labels,' + ' axis, expected_data, expected_index', + [ + # Unique Index + ([1, 2], ['one', 'two'], ['two'], + 0, [1], ['one']), + ([1, 2], ['one', 'two'], ['two'], + 'rows', [1], ['one']), + ([1, 1, 2], ['one', 'two', 'one'], ['two'], + 0, [1, 2], ['one', 'one']), + + # GH 5248 Non-Unique Index + ([1, 1, 2], ['one', 'two', 'one'], 'two', + 0, [1, 2], ['one', 'one']), + ([1, 1, 2], ['one', 'two', 'one'], ['one'], + 0, [1], ['two']), + ([1, 1, 2], ['one', 'two', 'one'], 'one', + 0, [1], ['two'])]) +def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels, + expected_data, expected_index): + + s = Series(data=data, index=index) + result = s.drop(drop_labels, axis=axis) + expected = Series(data=expected_data, index=expected_index) + tm.assert_series_equal(result, expected) - # single string/tuple-like - s = Series(range(3), index=list('abc')) - pytest.raises(KeyError, s.drop, 'bc') - pytest.raises(KeyError, s.drop, ('a',)) +@pytest.mark.parametrize( + 'data, index, drop_labels,' + ' axis, error_type, error_desc', + [ + # single string/tuple-like + (range(3), list('abc'), 'bc', + 0, KeyError, 'not found in axis'), + + # bad axis + (range(3), list('abc'), ('a',), + 0, KeyError, 'not found in axis'), + (range(3), list('abc'), 'one', + 'columns', ValueError, 'No axis named columns')]) +def test_drop_exception_raised(data, index, drop_labels, + axis, error_type, error_desc): + + with tm.assert_raises_regex(error_type, error_desc): + Series(data, index=index).drop(drop_labels, axis=axis) + + +def test_drop_with_ignore_errors(): # errors='ignore' s = Series(range(3), index=list('abc')) result = s.drop('bc', errors='ignore') - assert_series_equal(result, s) + tm.assert_series_equal(result, s) result = s.drop(['a', 'd'], errors='ignore') expected = s.iloc[1:] - assert_series_equal(result, expected) - - # bad axis - pytest.raises(ValueError, s.drop, 'one', axis='columns') + tm.assert_series_equal(result, expected) # GH 8522 s = Series([2, 3], index=[True, False]) assert s.index.is_object() result = s.drop(True) expected = Series([3], index=[False]) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + - # GH 16877 - s = Series([2, 3], index=[0, 1]) - with tm.assert_raises_regex(KeyError, 'not contained in axis'): - s.drop([False, True]) +@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]]) +@pytest.mark.parametrize('drop_labels', [[], [1], [3]]) +def test_drop_empty_list(index, drop_labels): + # GH 21494 + expected_index = [i for i in index if i not in drop_labels] + series = pd.Series(index=index).drop(drop_labels) + tm.assert_series_equal(series, pd.Series(index=expected_index)) + + +@pytest.mark.parametrize('data, index, drop_labels', [ + (None, [1, 2, 3], [1, 4]), + (None, [1, 2, 2], [1, 4]), + ([2, 3], [0, 1], [False, True]) +]) +def test_drop_non_empty_list(data, index, drop_labels): + # GH 21494 and GH 16877 + with tm.assert_raises_regex(KeyError, 'not found in axis'): + pd.Series(data=data, index=index).drop(drop_labels)