Skip to content

Commit

Permalink
DEPR: Deprecate usecols as int in read_excel (pandas-dev#23635)
Browse files Browse the repository at this point in the history
Follow-up to pandas-devgh-23544.
  • Loading branch information
gfyoung authored and JustinZhengBC committed Nov 14, 2018
1 parent 999ef43 commit 7fc3732
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 11 deletions.
5 changes: 5 additions & 0 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2854,6 +2854,11 @@ It is often the case that users will insert columns to do temporary computations
in Excel and you may not want to read in those columns. ``read_excel`` takes
a ``usecols`` keyword to allow you to specify a subset of columns to parse.

.. deprecated:: 0.24.0

Passing in an integer for ``usecols`` has been deprecated. Please pass in a list
of ints from 0 to ``usecols`` inclusive instead.

If ``usecols`` is an integer, then it is assumed to indicate the last column
to be parsed.

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -970,6 +970,7 @@ Deprecations
- The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`)
- Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of
`use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`)
- :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`)
- Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`)

.. _whatsnew_0240.deprecations.datetimelike_int_ops:
Expand Down
8 changes: 8 additions & 0 deletions pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@
usecols : int, str, list-like, or callable default None
* If None, then parse all columns,
* If int, then indicates last column to be parsed
.. deprecated:: 0.24.0
Pass in a list of ints instead from 0 to `usecols` inclusive.
* If string, then indicates comma separated list of Excel column letters
and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of
both sides.
Expand Down Expand Up @@ -778,6 +782,10 @@ def _maybe_convert_usecols(usecols):
return usecols

if is_integer(usecols):
warnings.warn(("Passing in an integer for `usecols` has been "
"deprecated. Please pass in a list of ints from "
"0 to `usecols` inclusive instead."),
FutureWarning, stacklevel=2)
return lrange(usecols + 1)

if isinstance(usecols, compat.string_types):
Expand Down
33 changes: 22 additions & 11 deletions pandas/tests/io/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,23 +105,34 @@ def get_exceldf(self, basename, ext, *args, **kwds):
class ReadingTestsBase(SharedItems):
# This is based on ExcelWriterBase

@td.skip_if_no('xlrd', '1.0.1') # GH-22682
@td.skip_if_no("xlrd", "1.0.1") # see gh-22682
def test_usecols_int(self, ext):

dfref = self.get_csv_refdf('test1')
dfref = dfref.reindex(columns=['A', 'B', 'C'])
df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols=3)
df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
index_col=0, usecols=3)
df_ref = self.get_csv_refdf("test1")
df_ref = df_ref.reindex(columns=["A", "B", "C"])

with tm.assert_produces_warning(FutureWarning):
df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
# usecols as int
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
df1 = self.get_exceldf("test1", ext, "Sheet1",
index_col=0, usecols=3)

# usecols as int
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
df2 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1],
index_col=0, usecols=3)

# parse_cols instead of usecols, usecols as int
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
df3 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1],
index_col=0, parse_cols=3)

# TODO add index to xls file)
tm.assert_frame_equal(df1, dfref, check_names=False)
tm.assert_frame_equal(df2, dfref, check_names=False)
tm.assert_frame_equal(df3, dfref, check_names=False)
tm.assert_frame_equal(df1, df_ref, check_names=False)
tm.assert_frame_equal(df2, df_ref, check_names=False)
tm.assert_frame_equal(df3, df_ref, check_names=False)

@td.skip_if_no('xlrd', '1.0.1') # GH-22682
def test_usecols_list(self, ext):
Expand Down

0 comments on commit 7fc3732

Please sign in to comment.