Skip to content

Commit

Permalink
Deprecated usecols with out of bounds indices in read_csv (pandas-dev…
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored and JulianWgs committed Jul 3, 2021
1 parent 216d5cd commit c54643b
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Expand Up @@ -645,6 +645,7 @@ Deprecations
- The ``inplace`` parameter of :meth:`Categorical.remove_categories`, :meth:`Categorical.add_categories`, :meth:`Categorical.reorder_categories`, :meth:`Categorical.rename_categories`, :meth:`Categorical.set_categories` is deprecated and will be removed in a future version (:issue:`37643`)
- Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword and already existing columns (:issue:`22818`)
- Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`)
- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)

.. ---------------------------------------------------------------------------
Expand Down
11 changes: 11 additions & 0 deletions pandas/_libs/parsers.pyx
Expand Up @@ -947,6 +947,17 @@ cdef class TextReader:
f"{self.table_width - self.leading_cols} "
f"and found {num_cols}")

if (self.usecols is not None and not callable(self.usecols) and
all(isinstance(u, int) for u in self.usecols)):
missing_usecols = [col for col in self.usecols if col >= num_cols]
if missing_usecols:
warnings.warn(
"Defining usecols with out of bounds indices is deprecated "
"and will raise a ParserError in a future version.",
FutureWarning,
stacklevel=6,
)

results = {}
nused = 0
for i in range(self.table_width):
Expand Down
32 changes: 26 additions & 6 deletions pandas/io/parsers/python_parser.py
Expand Up @@ -15,6 +15,7 @@
Tuple,
cast,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -477,7 +478,7 @@ def _infer_columns(self):
if self.usecols is not None:
# Set _use_cols. We don't store columns because they are
# overwritten.
self._handle_usecols(columns, names)
self._handle_usecols(columns, names, num_original_columns)
else:
num_original_columns = len(names)
if self._col_indices is not None and len(names) != len(
Expand All @@ -487,7 +488,9 @@ def _infer_columns(self):
else:
columns = [names]
else:
columns = self._handle_usecols(columns, columns[0])
columns = self._handle_usecols(
columns, columns[0], num_original_columns
)
else:
try:
line = self._buffered_line()
Expand All @@ -506,10 +509,12 @@ def _infer_columns(self):
columns = [[f"{self.prefix}{i}" for i in range(ncols)]]
else:
columns = [list(range(ncols))]
columns = self._handle_usecols(columns, columns[0])
columns = self._handle_usecols(
columns, columns[0], num_original_columns
)
else:
if self.usecols is None or len(names) >= num_original_columns:
columns = self._handle_usecols([names], names)
columns = self._handle_usecols([names], names, num_original_columns)
num_original_columns = len(names)
else:
if not callable(self.usecols) and len(names) != len(self.usecols):
Expand All @@ -518,13 +523,18 @@ def _infer_columns(self):
"header fields in the file"
)
# Ignore output but set used columns.
self._handle_usecols([names], names)
self._handle_usecols([names], names, ncols)
columns = [names]
num_original_columns = ncols

return columns, num_original_columns, unnamed_cols

def _handle_usecols(self, columns, usecols_key):
def _handle_usecols(
self,
columns: List[List[Union[Optional[str], Optional[int]]]],
usecols_key: List[Union[Optional[str], Optional[int]]],
num_original_columns: int,
):
"""
Sets self._col_indices
Expand All @@ -549,6 +559,16 @@ def _handle_usecols(self, columns, usecols_key):
else:
col_indices.append(col)
else:
missing_usecols = [
col for col in self.usecols if col >= num_original_columns
]
if missing_usecols:
warnings.warn(
"Defining usecols with out of bounds indices is deprecated "
"and will raise a ParserError in a future version.",
FutureWarning,
stacklevel=8,
)
col_indices = self.usecols

columns = [
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/io/parser/usecols/test_usecols_basic.py
Expand Up @@ -383,7 +383,8 @@ def test_usecols_indices_out_of_bounds(all_parsers, names):
a,b
1,2
"""
result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0)
expected = DataFrame({"a": [1], "b": [None]})
if names is None and parser.engine == "python":
expected = DataFrame({"a": [1]})
Expand Down

0 comments on commit c54643b

Please sign in to comment.