Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New method: @example(...).xfail() #3537

Merged
merged 5 commits into from
Jan 8, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 17 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
RELEASE_TYPE: minor

A classic error when testing is to write a test function that can never fail,
even on inputs that aren't allowed or manually provided. By analogy to the
design pattern of::

@pytest.mark.parametrize("arg", [
..., # passing examples
pytest.param(..., marks=[pytest.mark.xfail]) # expected-failing input
])

we now support :obj:`@example(...).xfail() <hypothesis.example.xfail>`, with
the same (optional) ``condition``, ``reason``, and ``raises`` arguments as
``pytest.mark.xfail()``.

Naturally you can also write ``.via(...).xfail(...)``, or ``.xfail(...).via(...)``,
if you wish to note the provenance of expected-failing examples.
2 changes: 2 additions & 0 deletions hypothesis-python/docs/reproducing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ Either are fine, and you can use one in one example and the other in another
example if for some reason you really want to, but a single example must be
consistent.

.. automethod:: hypothesis.example.xfail

.. automethod:: hypothesis.example.via

.. _reproducing-with-seed:
Expand Down
103 changes: 94 additions & 9 deletions hypothesis-python/src/hypothesis/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
Hashable,
List,
Optional,
Tuple,
Type,
TypeVar,
Union,
overload,
Expand Down Expand Up @@ -92,10 +94,12 @@
get_signature,
impersonate,
is_mock,
nicerepr,
proxies,
repr_call,
)
from hypothesis.internal.scrutineer import Tracer, explanatory_lines
from hypothesis.internal.validation import check_type
from hypothesis.reporting import (
current_verbosity,
report,
Expand Down Expand Up @@ -134,6 +138,9 @@
class Example:
args = attr.ib()
kwargs = attr.ib()
# Plus two optional arguments for .xfail()
raises = attr.ib(default=None)
reason = attr.ib(default=None)


class example:
Expand All @@ -156,6 +163,49 @@ def __call__(self, test: TestFunc) -> TestFunc:
test.hypothesis_explicit_examples.append(self._this_example) # type: ignore
return test

def xfail(
self,
condition: bool = True,
*,
reason: str = "",
raises: Union[Type[BaseException], Tuple[Type[BaseException], ...]] = BaseException,
rsokl marked this conversation as resolved.
Show resolved Hide resolved
) -> "example":
"""Mark this example as an expected failure, like pytest.mark.xfail().

Expected-failing examples allow you to check that your test does fail on
some examples, and therefore build confidence that *passing* tests are
because your code is working, not because the test is missing something.

.. code-block:: python

@example(...).xfail()
@example(...).xfail(reason="Prices must be non-negative")
@example(...).xfail(raises=(KeyError, ValueError))
@example(...).xfail(sys.version_info[:2] >= (3, 9), reason="needs py39+")
@example(...).xfail(condition=sys.platform != "linux", raises=OSError)
def test(x):
pass
"""
check_type(bool, condition, "condition")
check_type(str, reason, "reason")
if not (
isinstance(raises, type) and issubclass(raises, BaseException)
) and not (
isinstance(raises, tuple)
and raises # () -> expected to fail with no error, which is impossible
and all(
isinstance(r, type) and issubclass(r, BaseException) for r in raises
)
):
raise InvalidArgument(
f"raises={raises!r} must be an exception type or tuple of exception types"
)
if condition:
self._this_example = attr.evolve(
self._this_example, raises=raises, reason=reason
)
return self

def via(self, *whence: str) -> "example":
"""Attach a machine-readable label noting whence this example came.

Expand Down Expand Up @@ -400,9 +450,7 @@ def draw(self, strategy):
assert self.__draws == 0
self.__draws += 1
# The main strategy for given is always a tuples strategy that returns
# first positional arguments then keyword arguments. When building this
# object already converted all positional arguments to keyword arguments,
# so this is the correct format to return.
# first positional arguments then keyword arguments.
return self.__args, self.__kwargs


Expand All @@ -414,6 +462,7 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s
]

for example in reversed(getattr(wrapped_test, "hypothesis_explicit_examples", ())):
assert isinstance(example, Example)
# All of this validation is to check that @example() got "the same" arguments
# as @given, i.e. corresponding to the same parameters, even though they might
# be any mixture of positional and keyword arguments.
Expand Down Expand Up @@ -455,12 +504,47 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s
with local_settings(state.settings):
fragments_reported = []
try:
adata = ArtificialDataForExample(arguments, example_kwargs)
bits = ", ".join(nicerepr(x) for x in arguments) + ", ".join(
f"{k}={nicerepr(v)}" for k, v in example_kwargs.items()
)
with with_reporter(fragments_reported.append):
state.execute_once(
ArtificialDataForExample(arguments, example_kwargs),
is_final=True,
print_example=True,
)
if example.raises is None:
state.execute_once(adata, is_final=True, print_example=True)
else:
# @example(...).xfail(...)
try:
state.execute_once(adata, is_final=True, print_example=True)
except failure_exceptions_to_catch() as err:
if not isinstance(err, example.raises):
raise
except example.raises as err:
# We'd usually check this as early as possible, but it's
# possible for failure_exceptions_to_catch() to grow when
# e.g. pytest is imported between import- and test-time.
raise InvalidArgument(
f"@example({bits}) raised an expected {err!r}, "
"but Hypothesis does not treat this as a test failure"
) from err
else:
# Unexpectedly passing; always raise an error in this case.
reason = f" because {example.reason}" * bool(example.reason)
if example.raises is BaseException:
name = "exception" # special-case no raises= arg
elif not isinstance(example.raises, tuple):
name = example.raises.__name__
elif len(example.raises) == 1:
name = example.raises[0].__name__
else:
name = (
", ".join(ex.__name__ for ex in example.raises[:-1])
+ f", or {example.raises[-1].__name__}"
)
vowel = name.upper()[0] in "AEIOU"
raise AssertionError(
f"Expected a{'n' * vowel} {name} from @example({bits})"
f"{reason}, but no exception was raised."
)
except UnsatisfiedAssumption:
# Odd though it seems, we deliberately support explicit examples that
# are then rejected by a call to `assume()`. As well as iterative
Expand All @@ -478,7 +562,7 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s
# One user error - whether misunderstanding or typo - we've seen a few
# times is to pass strategies to @example() where values are expected.
# Checking is easy, and false-positives not much of a problem, so:
if any(
if isinstance(err, failure_exceptions_to_catch()) and any(
isinstance(arg, SearchStrategy)
for arg in example.args + tuple(example.kwargs.values())
):
Expand All @@ -494,6 +578,7 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s
if (
state.settings.report_multiple_bugs
and pytest_shows_exceptiongroups
and isinstance(err, failure_exceptions_to_catch())
and not isinstance(err, skip_exceptions_to_reraise())
):
continue
Expand Down
4 changes: 2 additions & 2 deletions hypothesis-python/tests/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class ExcInfo:
pass


def fails_with(e):
def fails_with(e, *, match=None):
def accepts(f):
@proxies(f)
def inverted_test(*arguments, **kwargs):
Expand All @@ -98,7 +98,7 @@ def inverted_test(*arguments, **kwargs):
# the `raises` context manager so that any problems in rigging the
# PRNG don't accidentally count as the expected failure.
with deterministic_PRNG():
with raises(e):
with raises(e, match=match):
f(*arguments, **kwargs)

return inverted_test
Expand Down
98 changes: 98 additions & 0 deletions hypothesis-python/tests/cover/test_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ def test_interactive_example_does_not_emit_warning():
child.sendline("quit(code=0)")


@fails_with(KeyboardInterrupt)
@example(1)
@example(2)
@given(st.none())
def test_raises_keyboardinterrupt_immediately(_):
raise KeyboardInterrupt


def identity(decorator):
# The "identity function hack" from https://peps.python.org/pep-0614/
# Method-chaining decorators are otherwise a syntax error in Python <= 3.8
Expand All @@ -104,3 +112,93 @@ def test_invalid_example_via():
example(x=False).via(100) # not a string!
with pytest.raises(TypeError):
example(x=False).via("abc", "def") # too many args


@pytest.mark.parametrize(
"kw",
[
{"condition": None}, # must be a bool
{"reason": None}, # must be a string
{"raises": None}, # not a BaseException (or even a type)
{"raises": int}, # not a BaseException
{"raises": [Exception]}, # not a tuple
{"raises": (None,)}, # tuple containing a non-BaseException
{"raises": ()}, # empty tuple doesn't make sense here
# raising non-failure exceptions, eg KeyboardInterrupt, is tested below
],
ids=repr,
)
def test_invalid_example_xfail_arguments(kw):
with pytest.raises(InvalidArgument):
example(x=False).xfail(**kw)


@identity(example(True).xfail())
@identity(example(True).xfail(reason="ignored for passing tests"))
@identity(example(True).xfail(raises=KeyError))
@identity(example(True).xfail(raises=(KeyError, ValueError)))
@identity(example(True).xfail(True, reason="..."))
@identity(example(False).xfail(condition=False))
@given(st.none())
def test_many_xfail_example_decorators(fails):
if fails:
raise KeyError


@fails_with(AssertionError)
@identity(example(x=True).xfail(raises=KeyError))
@given(st.none())
def test_xfail_reraises_non_specified_exception(x):
assert not x


@fails_with(
InvalidArgument,
match=r"@example\(x=True\) raised an expected BaseException\('msg'\), "
r"but Hypothesis does not treat this as a test failure",
)
@identity(example(True).xfail())
@given(st.none())
def test_must_raise_a_failure_exception(x):
if x:
raise BaseException("msg")


@fails_with(
AssertionError,
match=r"Expected an exception from @example\(x=None\), but no exception was raised.",
)
@identity(example(None).xfail())
@given(st.none())
def test_error_on_unexpected_pass_base(x):
pass


@fails_with(
AssertionError,
match=r"Expected an AssertionError from @example\(x=None\), but no exception was raised.",
)
@identity(example(None).xfail(raises=AssertionError))
@given(st.none())
def test_error_on_unexpected_pass_single(x):
pass


@fails_with(
AssertionError,
match=r"Expected an AssertionError from @example\(x=None\), but no exception was raised.",
)
@identity(example(None).xfail(raises=(AssertionError,)))
@given(st.none())
def test_error_on_unexpected_pass_single_elem_tuple(x):
pass


@fails_with(
AssertionError,
match=r"Expected a KeyError, or ValueError from @example\(x=None\), but no exception was raised.",
)
@identity(example(None).xfail(raises=(KeyError, ValueError)))
@given(st.none())
def test_error_on_unexpected_pass_multi(x):
pass
8 changes: 5 additions & 3 deletions hypothesis-python/tests/nocover/test_integer_ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import pytest

from hypothesis import given
from hypothesis import given, settings
from hypothesis.internal.conjecture.utils import integer_range
from hypothesis.strategies import integers
from hypothesis.strategies._internal.strategies import SearchStrategy
Expand Down Expand Up @@ -47,14 +47,16 @@ def test_intervals_shrink_to_center(lower_center_upper):
def test_bounded_integers_distribution_of_bit_width_issue_1387_regression():
values = []

@settings(database=None, max_examples=1000)
@given(integers(0, 1e100))
def test(x):
values.append(x)
if 2 <= x <= int(1e100) - 2: # skip forced-endpoints
values.append(x)

test()

# We draw from a shaped distribution up to 128bit ~7/8 of the time, and
# uniformly the rest. So we should get some very large but not too many.
huge = sum(x > 1e97 for x in values)
assert huge != 0
assert huge != 0 or len(values) < 800
assert huge <= 0.3 * len(values) # expected ~1/8