DEPR: Deprecate usecols as int in read_excel (pandas-dev#23635)

Follow-up to pandas-devgh-23544.
JustinZhengBC · Nov 14, 2018 · 7fc3732 · 7fc3732
1 parent 999ef43
commit 7fc3732
Show file tree

Hide file tree

Showing 4 changed files with 36 additions and 11 deletions.
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -2854,6 +2854,11 @@ It is often the case that users will insert columns to do temporary computations
 in Excel and you may not want to read in those columns. ``read_excel`` takes
 a ``usecols`` keyword to allow you to specify a subset of columns to parse.
 
+.. deprecated:: 0.24.0
+
+Passing in an integer for ``usecols`` has been deprecated. Please pass in a list
+of ints from 0 to ``usecols`` inclusive instead.
+
 If ``usecols`` is an integer, then it is assumed to indicate the last column
 to be parsed.
 

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -970,6 +970,7 @@ Deprecations
 - The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`)
 - Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of
   `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`)
+- :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`)
 - Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`)
 
 .. _whatsnew_0240.deprecations.datetimelike_int_ops:

diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -95,6 +95,10 @@
 usecols : int, str, list-like, or callable default None
     * If None, then parse all columns,
     * If int, then indicates last column to be parsed
+
+    .. deprecated:: 0.24.0
+       Pass in a list of ints instead from 0 to `usecols` inclusive.
+
     * If string, then indicates comma separated list of Excel column letters
       and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of
       both sides.
@@ -778,6 +782,10 @@ def _maybe_convert_usecols(usecols):
         return usecols
 
     if is_integer(usecols):
+        warnings.warn(("Passing in an integer for `usecols` has been "
+                       "deprecated. Please pass in a list of ints from "
+                       "0 to `usecols` inclusive instead."),
+                      FutureWarning, stacklevel=2)
         return lrange(usecols + 1)
 
     if isinstance(usecols, compat.string_types):

diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
@@ -105,23 +105,34 @@ def get_exceldf(self, basename, ext, *args, **kwds):
 class ReadingTestsBase(SharedItems):
     # This is based on ExcelWriterBase
 
-    @td.skip_if_no('xlrd', '1.0.1')  # GH-22682
+    @td.skip_if_no("xlrd", "1.0.1")  # see gh-22682
     def test_usecols_int(self, ext):
 
-        dfref = self.get_csv_refdf('test1')
-        dfref = dfref.reindex(columns=['A', 'B', 'C'])
-        df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols=3)
-        df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
-                               index_col=0, usecols=3)
+        df_ref = self.get_csv_refdf("test1")
+        df_ref = df_ref.reindex(columns=["A", "B", "C"])
 
-        with tm.assert_produces_warning(FutureWarning):
-            df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
+        # usecols as int
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            df1 = self.get_exceldf("test1", ext, "Sheet1",
+                                   index_col=0, usecols=3)
+
+        # usecols as int
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            df2 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1],
+                                   index_col=0, usecols=3)
+
+        # parse_cols instead of usecols, usecols as int
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            df3 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1],
                                    index_col=0, parse_cols=3)
 
         # TODO add index to xls file)
-        tm.assert_frame_equal(df1, dfref, check_names=False)
-        tm.assert_frame_equal(df2, dfref, check_names=False)
-        tm.assert_frame_equal(df3, dfref, check_names=False)
+        tm.assert_frame_equal(df1, df_ref, check_names=False)
+        tm.assert_frame_equal(df2, df_ref, check_names=False)
+        tm.assert_frame_equal(df3, df_ref, check_names=False)
 
     @td.skip_if_no('xlrd', '1.0.1')  # GH-22682
     def test_usecols_list(self, ext):