Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace MultipleFailures with ExceptionGroup on Python 3.11+ #3191

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions hypothesis-python/RELEASE.rst
@@ -0,0 +1,4 @@
RELEASE_TYPE: minor

This release converts our ``MultipleFailures`` reporting to use the new builtin
``ExceptionGroup`` type on Python 3.11, which is currently in alpha (:issue:`3175`).
123 changes: 69 additions & 54 deletions hypothesis-python/src/hypothesis/core.py
Expand Up @@ -803,50 +803,47 @@ def run_engine(self):
# The engine found one or more failures, so we need to reproduce and
# report them.

errors_to_report = []
flaky = 0

if runner.best_observed_targets:
for line in describe_targets(runner.best_observed_targets):
report(line)
report("")
report_lines = describe_targets(runner.best_observed_targets)
if report_lines:
report_lines.append("")

explanations = explanatory_lines(self.explain_traces, self.settings)
for falsifying_example in self.falsifying_examples:
info = falsifying_example.extra_information
fragments = []

ran_example = ConjectureData.for_buffer(falsifying_example.buffer)
self.__was_flaky = False
assert info.__expected_exception is not None
try:
self.execute_once(
ran_example,
print_example=not self.is_find,
is_final=True,
expected_failure=(
info.__expected_exception,
info.__expected_traceback,
),
)
with with_reporter(fragments.append):
self.execute_once(
ran_example,
print_example=not self.is_find,
is_final=True,
expected_failure=(
info.__expected_exception,
info.__expected_traceback,
),
)
except (UnsatisfiedAssumption, StopTest) as e:
report(format_exception(e, e.__traceback__))
self.__flaky(
err = Flaky(
"Unreliable assumption: An example which satisfied "
"assumptions on the first run now fails it."
)
err.__cause__ = err.__context__ = e
fragments.append(format_exception(e, e.__traceback__))
errors_to_report.append((fragments, err))
except BaseException as e:
# If we have anything for explain-mode, this is the time to report.
for line in explanations[falsifying_example.interesting_origin]:
report(line)

if len(self.falsifying_examples) <= 1:
# There is only one failure, so we can report it by raising
# it directly.
raise

# We are reporting multiple failures, so we need to manually
# print each exception's stack trace and information.
tb = get_trimmed_traceback()
report(format_exception(e, tb))
fragments.append(line)
errors_to_report.append(
(fragments, e.with_traceback(get_trimmed_traceback()))
)

finally: # pragma: no cover
# Mostly useful for ``find`` and ensuring that objects that
Expand All @@ -864,27 +861,14 @@ def run_engine(self):
# you add a pragma: no cover to it!
# See https://github.com/nedbat/coveragepy/issues/623
if self.settings.print_blob:
report(
fragments.append(
"\nYou can reproduce this example by temporarily adding "
"@reproduce_failure(%r, %r) as a decorator on your test case"
% (__version__, encode_failure(falsifying_example.buffer))
)
if self.__was_flaky:
flaky += 1

# If we only have one example then we should have raised an error or
# flaky prior to this point.
assert len(self.falsifying_examples) > 1

if flaky > 0:
raise Flaky(
f"Hypothesis found {len(self.falsifying_examples)} distinct failures, "
f"but {flaky} of them exhibited some sort of flaky behaviour."
)
else:
raise MultipleFailures(
f"Hypothesis found {len(self.falsifying_examples)} distinct failures."
)
_raise_to_user(errors_to_report, self.settings, report_lines, flaky=flaky)

def __flaky(self, message):
if len(self.falsifying_examples) <= 1:
Expand All @@ -894,6 +878,45 @@ def __flaky(self, message):
report("Flaky example! " + message)


def _raise_to_user(errors_to_report, settings, target_lines, trailer="", flaky=None):
assert errors_to_report
if len(errors_to_report) == 1:
fragments, the_error_hypothesis_found = errors_to_report[0]
for line in target_lines + fragments:
report(line)
if flaky:
raise Flaky() from the_error_hypothesis_found
raise the_error_hypothesis_found

if not hasattr(Exception, "__note__"): # pragma: no branch # Python 3.11+
for line in target_lines:
report(line)

for fragments, err in errors_to_report:
if hasattr(err, "__note__"): # pragma: no cover # Python 3.11+
if fragments and settings.verbosity >= Verbosity.normal:
err.__note__ = (err.__note__ or "") + "\n" + "\n".join(fragments)
else:
with local_settings(settings):
for line in fragments:
report(line)
report(format_exception(err, err.__traceback__))

if flaky: # pragma: no cover # required for Python 3.11+
raise Flaky(
f"Hypothesis found {len(errors_to_report)} distinct failures, "
f"but {flaky} of them exhibited some sort of flaky behaviour."
)
error = MultipleFailures(
f"Hypothesis found {len(errors_to_report)} distinct failures{trailer}.",
[e for _, e in errors_to_report],
)
if hasattr(error, "__note__"): # pragma: no cover # Python 3.11+
if target_lines and settings.verbosity >= Verbosity.normal:
error.__note__ = (error.__note__ or "") + "\n" + "\n".join(target_lines)
raise error


@contextlib.contextmanager
def fake_subTest(self, msg=None, **__):
"""Monkeypatch for `unittest.TestCase.subTest` during `@given`.
Expand Down Expand Up @@ -1130,18 +1153,8 @@ def wrapped_test(*arguments, **kwargs):
# If we're not going to report multiple bugs, we would have
# stopped running explicit examples at the first failure.
assert state.settings.report_multiple_bugs
for fragments, err in errors:
for f in fragments:
report(f)
report(format_exception(err, err.__traceback__))
raise MultipleFailures(
f"Hypothesis found {len(errors)} failures in explicit examples."
)
elif errors:
fragments, the_error_hypothesis_found = errors[0]
for f in fragments:
report(f)
raise the_error_hypothesis_found
if errors:
_raise_to_user(errors, state.settings, [], " in explicit examples")

# If there were any explicit examples, they all ran successfully.
# The next step is to use the Conjecture engine to run the test on
Expand Down Expand Up @@ -1190,7 +1203,9 @@ def wrapped_test(*arguments, **kwargs):
# which will actually appear in tracebacks is as clear as
# possible - "raise the_error_hypothesis_found".
the_error_hypothesis_found = e.with_traceback(
get_trimmed_traceback()
None
if isinstance(e, MultipleFailures)
else get_trimmed_traceback()
)
raise the_error_hypothesis_found

Expand Down
17 changes: 14 additions & 3 deletions hypothesis-python/src/hypothesis/errors.py
Expand Up @@ -133,9 +133,20 @@ class Frozen(HypothesisException):
after freeze() has been called."""


class MultipleFailures(_Trimmable):
"""Indicates that Hypothesis found more than one distinct bug when testing
your code."""
try:
MultipleFailures = ExceptionGroup # type: ignore
except NameError: # pragma: no branch

class MultipleFailures(_Trimmable): # type: ignore
"""Indicates that Hypothesis found more than one distinct bug when testing
your code.

In Python 3.11+, this is an alias for the builtin ExceptionGroup type.
"""

def __init__(self, message, exceptions):
super().__init__(message)
self.exceptions = tuple(exceptions)


class DeadlineExceeded(_Trimmable):
Expand Down
45 changes: 29 additions & 16 deletions hypothesis-python/tests/cover/test_slippage.py
Expand Up @@ -15,7 +15,7 @@

import pytest

from hypothesis import Phase, assume, given, settings, strategies as st
from hypothesis import Phase, assume, given, settings, strategies as st, target
from hypothesis.database import InMemoryExampleDatabase
from hypothesis.errors import Flaky, MultipleFailures
from hypothesis.internal.conjecture.engine import MIN_TEST_CALLS
Expand All @@ -27,6 +27,17 @@
)


def capture_reports(test):
with capture_out() as o, pytest.raises(MultipleFailures) as err:
test()

if hasattr(Exception, "__note__"):
return "\n\n".join(
f"{e!r}\n{e.__note__ or ''}" for e in (err.value,) + err.value.exceptions
)
return o.getvalue()


def test_raises_multiple_failures_with_varying_type():
target = [None]

Expand All @@ -43,12 +54,20 @@ def test(i):
exc_class = TypeError if target[0] == i else ValueError
raise exc_class()

with capture_out() as o:
with pytest.raises(MultipleFailures):
test()
output = capture_reports(test)
assert "TypeError" in output
assert "ValueError" in output


assert "TypeError" in o.getvalue()
assert "ValueError" in o.getvalue()
def test_shows_target_scores_with_multiple_failures():
@settings(database=None, max_examples=100)
@given(st.integers())
def test(i):
target(i)
assert i > 0
assert i < 0

assert "Highest target score:" in capture_reports(test)


def test_raises_multiple_failures_when_position_varies():
Expand All @@ -66,11 +85,9 @@ def test(i):
else:
raise ValueError("loc 2")

with capture_out() as o:
with pytest.raises(MultipleFailures):
test()
assert "loc 1" in o.getvalue()
assert "loc 2" in o.getvalue()
output = capture_reports(test)
assert "loc 1" in output
assert "loc 2" in output


def test_replays_both_failing_values():
Expand Down Expand Up @@ -185,11 +202,7 @@ def test(i):
else:
duds.add(i)

with capture_out() as o:
with pytest.raises(MultipleFailures):
test()

output = o.getvalue()
output = capture_reports(test)
assert_output_contains_failure(output, test, i=10000)
assert_output_contains_failure(output, test, i=second_target[0])

Expand Down