Skip to content

Commit

Permalink
DEPR: ignoring dtype in DataFrame constructor failures (pandas-dev#41733
Browse files Browse the repository at this point in the history
)
  • Loading branch information
jbrockmendel authored and TLouf committed Jun 1, 2021
1 parent bbacb1c commit bf5560c
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 15 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ Other enhancements
- Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
- Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
- :meth:`Series.replace` will now cast results to ``PeriodDtype`` where possible instead of ``object`` dtype (:issue:`41526`)
- Improved error message in ``corr` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`)
- Improved error message in ``corr`` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`)

.. ---------------------------------------------------------------------------
Expand Down Expand Up @@ -686,6 +686,7 @@ Deprecations
- Deprecated passing arguments (apart from ``cond`` and ``other``) as positional in :meth:`DataFrame.mask` and :meth:`Series.mask` (:issue:`41485`)
- Deprecated passing arguments as positional in :meth:`DataFrame.clip` and :meth:`Series.clip` (other than ``"upper"`` and ``"lower"``) (:issue:`41485`)
- Deprecated special treatment of lists with first element a Categorical in the :class:`DataFrame` constructor; pass as ``pd.DataFrame({col: categorical, ...})`` instead (:issue:`38845`)
- Deprecated behavior of :class:`DataFrame` constructor when a ``dtype`` is passed and the data cannot be cast to that dtype. In a future version, this will raise instead of being silently ignored (:issue:`24435`)
- Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`)
- Deprecated passing arguments as positional in :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, and :meth:`Series.bfill` (:issue:`41485`)
- Deprecated passing arguments as positional in :meth:`DataFrame.sort_values` (other than ``"by"``) and :meth:`Series.sort_values` (:issue:`41485`)
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
Sequence,
cast,
)
import warnings

import numpy as np
import numpy.ma as ma
Expand Down Expand Up @@ -745,6 +746,17 @@ def _try_cast(
if raise_cast_failure:
raise
else:
# we only get here with raise_cast_failure False, which means
# called via the DataFrame constructor
# GH#24435
warnings.warn(
f"Could not cast to {dtype}, falling back to object. This "
"behavior is deprecated. In a future version, when a dtype is "
"passed to 'DataFrame', either all columns will be cast to that "
"dtype, or a TypeError will be raised",
FutureWarning,
stacklevel=7,
)
subarr = np.array(arr, dtype=object, copy=copy)
return subarr

Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,11 @@ def f(dtype):
f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")])

# these work (though results may be unexpected)
f("int64")
f("float64")
depr_msg = "either all columns will be cast to that dtype, or a TypeError will"
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
f("int64")
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
f("float64")

# 10822
# invalid error message on dt inference
Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,9 @@ def test_constructor_mixed(self, float_string_frame):
assert float_string_frame["foo"].dtype == np.object_

def test_constructor_cast_failure(self):
foo = DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64)
msg = "either all columns will be cast to that dtype, or a TypeError will"
with tm.assert_produces_warning(FutureWarning, match=msg):
foo = DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64)
assert foo["a"].dtype == object

# GH 3010, constructing with odd arrays
Expand Down Expand Up @@ -683,7 +685,10 @@ def test_constructor_dict_cast2(self):
"A": dict(zip(range(20), tm.makeStringIndex(20))),
"B": dict(zip(range(15), np.random.randn(15))),
}
frame = DataFrame(test_data, dtype=float)
msg = "either all columns will be cast to that dtype, or a TypeError will"
with tm.assert_produces_warning(FutureWarning, match=msg):
frame = DataFrame(test_data, dtype=float)

assert len(frame) == 20
assert frame["A"].dtype == np.object_
assert frame["B"].dtype == np.float64
Expand Down
15 changes: 7 additions & 8 deletions pandas/tests/indexing/multiindex/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,27 +206,26 @@ def test_frame_getitem_nan_multiindex(nulls_fixture):
df = DataFrame(
[[11, n, 13], [21, n, 23], [31, n, 33], [41, n, 43]],
columns=cols,
dtype="int64",
).set_index(["a", "b"])
df["c"] = df["c"].astype("int64")

idx = (21, n)
result = df.loc[:idx]
expected = DataFrame(
[[11, n, 13], [21, n, 23]], columns=cols, dtype="int64"
).set_index(["a", "b"])
expected = DataFrame([[11, n, 13], [21, n, 23]], columns=cols).set_index(["a", "b"])
expected["c"] = expected["c"].astype("int64")
tm.assert_frame_equal(result, expected)

result = df.loc[idx:]
expected = DataFrame(
[[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols, dtype="int64"
[[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols
).set_index(["a", "b"])
expected["c"] = expected["c"].astype("int64")
tm.assert_frame_equal(result, expected)

idx1, idx2 = (21, n), (31, n)
result = df.loc[idx1:idx2]
expected = DataFrame(
[[21, n, 23], [31, n, 33]], columns=cols, dtype="int64"
).set_index(["a", "b"])
expected = DataFrame([[21, n, 23], [31, n, 33]], columns=cols).set_index(["a", "b"])
expected["c"] = expected["c"].astype("int64")
tm.assert_frame_equal(result, expected)


Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,12 @@ def test_loc_setitem_dtype(self):
df.loc[:, cols] = df.loc[:, cols].astype("float32")

expected = DataFrame(
{"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]}, dtype="float32"
{
"id": ["A"],
"a": np.array([1.2], dtype="float32"),
"b": np.array([0.0], dtype="float32"),
"c": np.array([-2.5], dtype="float32"),
}
) # id is inferred as object

tm.assert_frame_equal(df, expected)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/reshape/test_get_dummies.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,9 @@ def test_dataframe_dummies_subset(self, df, sparse):
"from_A_a": [1, 0, 1],
"from_A_b": [0, 1, 0],
},
dtype=np.uint8,
)
cols = expected.columns
expected[cols[1:]] = expected[cols[1:]].astype(np.uint8)
expected[["C"]] = df[["C"]]
if sparse:
cols = ["from_A_a", "from_A_b"]
Expand Down

0 comments on commit bf5560c

Please sign in to comment.