Skip to content

Commit

Permalink
Merge pull request #3204 from Zac-HD/pandas-dtypes
Browse files Browse the repository at this point in the history
Improve error messages for Pandas when `dtype=object` is omitted
  • Loading branch information
Zac-HD committed Dec 31, 2021
2 parents 9a304dd + 9345863 commit e024d1f
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 1 deletion.
5 changes: 5 additions & 0 deletions hypothesis-python/RELEASE.rst
@@ -0,0 +1,5 @@
RELEASE_TYPE: minor

This release fixes :issue:`3133` and :issue:`3144`, where attempting
to generate Pandas series of lists or sets would fail with confusing
errors if you did not specify ``dtype=object``.
24 changes: 23 additions & 1 deletion hypothesis-python/src/hypothesis/extra/pandas/impl.py
Expand Up @@ -10,13 +10,15 @@

from collections import OrderedDict, abc
from copy import copy
from datetime import datetime, timedelta
from typing import Any, List, Optional, Sequence, Set, Union

import attr
import numpy as np
import pandas

from hypothesis import strategies as st
from hypothesis._settings import note_deprecation
from hypothesis.control import reject
from hypothesis.errors import InvalidArgument
from hypothesis.extra import numpy as npst
Expand Down Expand Up @@ -78,6 +80,14 @@ def elements_and_dtype(elements, dtype, source=None):
f"{prefix}dtype is categorical, which is currently unsupported"
)

if isinstance(dtype, type) and np.dtype(dtype).kind == "O" and dtype is not object:
note_deprecation(
f"Passed dtype={dtype!r} is not a valid Pandas dtype. We'll treat it as "
"dtype=object for now, but this will be an error in a future version.",
since="RELEASEDAY",
has_codemod=False,
)

dtype = try_convert(np.dtype, dtype, "dtype")

if elements is None:
Expand Down Expand Up @@ -577,7 +587,19 @@ def just_draw_columns(draw):
reject()
else:
value = draw(c.elements)
data[c.name][i] = value
try:
data[c.name][i] = value
except ValueError as err:
if c.dtype is None and not isinstance(
value, (float, int, str, bool, datetime, timedelta)
):
raise ValueError(
f"Failed to add value={value!r} to column "
f"{c.name} with dtype=None. Maybe passing "
"dtype=object would help?"
) from err
# Unclear how this could happen, but users find a way...
raise # pragma: no cover

for c in rewritten_columns:
if not c.fill.is_empty:
Expand Down
6 changes: 6 additions & 0 deletions hypothesis-python/tests/pandas/test_argument_validation.py
Expand Up @@ -16,6 +16,7 @@
from hypothesis.extra import pandas as pdst

from tests.common.arguments import argument_validation_test, e
from tests.common.utils import checks_deprecated_behaviour

BAD_ARGS = [
e(pdst.data_frames),
Expand Down Expand Up @@ -91,3 +92,8 @@ def test_timestamp_as_datetime_bounds(dt):
assert isinstance(dt, datetime)
assert lo <= dt <= hi
assert not isinstance(dt, pd.Timestamp)


@checks_deprecated_behaviour
def test_confusing_object_dtype_aliases():
pdst.series(elements=st.tuples(st.integers()), dtype=tuple).example()
18 changes: 18 additions & 0 deletions hypothesis-python/tests/pandas/test_data_frame.py
Expand Up @@ -9,6 +9,7 @@
# obtain one at https://mozilla.org/MPL/2.0/.

import numpy as np
import pytest

from hypothesis import HealthCheck, given, reject, settings, strategies as st
from hypothesis.extra import numpy as npst, pandas as pdst
Expand Down Expand Up @@ -238,3 +239,20 @@ def test_will_fill_missing_columns_in_tuple_row(df):
)
def test_can_generate_unique_columns(df):
assert set(df[0]) == set(range(10))


@pytest.mark.parametrize("dtype", [None, object])
def test_expected_failure_from_omitted_object_dtype(dtype):
# See https://github.com/HypothesisWorks/hypothesis/issues/3133
col = pdst.column(elements=st.sets(st.text(), min_size=1), dtype=dtype)

@given(pdst.data_frames(columns=[col]))
def works_with_object_dtype(df):
pass

if dtype is object:
works_with_object_dtype()
else:
assert dtype is None
with pytest.raises(ValueError, match="Maybe passing dtype=object would help"):
works_with_object_dtype()

0 comments on commit e024d1f

Please sign in to comment.