Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This is a minimal proof-of-concept for predicate rewriting, sufficient to demonstrate that it works without breaking anything else. Equally importantly, it adds clear hooks for future work to allow open collaboration.
- Loading branch information
Showing
7 changed files
with
288 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
RELEASE_TYPE: patch | ||
|
||
This release lays the groundwork for automatic rewriting of simple filters, | ||
for example converting ``integers().filter(lambda x: x > 9)`` to | ||
``integers(min_value=10)``. | ||
|
||
Note that this is **not supported yet**, and we will continue to recommend | ||
writing the efficient form directly wherever possible - predicate rewriting | ||
is provided mainly for the benefit of downstream libraries which would | ||
otherwise have to implement it for themselves (e.g. :pypi:`pandera` and | ||
:pypi:`icontract-hypothesis`). See :issue:`2701` for details. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
# This file is part of Hypothesis, which may be found at | ||
# https://github.com/HypothesisWorks/hypothesis/ | ||
# | ||
# Most of this work is copyright (C) 2013-2021 David R. MacIver | ||
# (david@drmaciver.com), but it contains contributions by others. See | ||
# CONTRIBUTING.rst for a full list of people who may hold copyright, and | ||
# consult the git log if you need to determine who owns an individual | ||
# contribution. | ||
# | ||
# This Source Code Form is subject to the terms of the Mozilla Public License, | ||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||
# obtain one at https://mozilla.org/MPL/2.0/. | ||
# | ||
# END HEADER | ||
|
||
"""Tools for understanding predicates, to satisfy them by construction. | ||
For example:: | ||
integers().filter(lamda x: x >= 0) -> integers(min_value=0) | ||
This is intractable in general, but reasonably easy for simple cases involving | ||
numeric bounds, strings with length or regex constraints, and collection lengths - | ||
and those are precisely the most common cases. When they arise in e.g. Pandas | ||
dataframes, it's also pretty painful to do the constructive version by hand in | ||
a library; so we prefer to share all the implementation effort here. | ||
See https://github.com/HypothesisWorks/hypothesis/issues/2701 for details. | ||
""" | ||
|
||
import operator | ||
from decimal import Decimal | ||
from fractions import Fraction | ||
from functools import partial | ||
from typing import Any, Callable, Mapping, Optional, Tuple, TypeVar | ||
|
||
from hypothesis.internal.compat import ceil, floor | ||
|
||
Ex = TypeVar("Ex") | ||
Predicate = Callable[[Ex], bool] | ||
|
||
ConstructivePredicate = Tuple[Mapping[str, Any], Optional[Predicate]] | ||
"""Return kwargs to the appropriate strategy, and the predicate if needed. | ||
For example:: | ||
integers().filter(lambda x: x >= 0) | ||
-> {"min_value": 0"}, None | ||
integers().filter(lambda x: x >= 0 and x % 7) | ||
-> {"min_value": 0"}, lambda x: x % 7 | ||
At least in principle - for now we usually return the predicate unchanged | ||
if needed. | ||
We have a separate get-predicate frontend for each "group" of strategies; e.g. | ||
for each numeric type, for strings, for bytes, for collection sizes, etc. | ||
""" | ||
|
||
|
||
def get_numeric_predicate_bounds(predicate: Predicate) -> ConstructivePredicate: | ||
"""Shared logic for understanding numeric bounds. | ||
We then specialise this in the other functions below, to ensure that e.g. | ||
all the values are representable in the types that we're planning to generate | ||
so that the strategy validation doesn't complain. | ||
""" | ||
if ( | ||
type(predicate) is partial | ||
and len(predicate.args) == 1 | ||
and not predicate.keywords | ||
): | ||
arg = predicate.args[0] | ||
if (isinstance(arg, Decimal) and Decimal.is_snan(arg)) or not isinstance( | ||
arg, (int, float, Fraction, Decimal) | ||
): | ||
return {}, predicate | ||
options = { | ||
# We're talking about op(arg, x) - the reverse of our usual intuition! | ||
operator.lt: {"min_value": arg, "exclude_min": True}, # lambda x: arg < x | ||
operator.le: {"min_value": arg}, # lambda x: arg <= x | ||
operator.eq: {"min_value": arg, "max_value": arg}, # lambda x: arg == x | ||
operator.ge: {"max_value": arg}, # lambda x: arg >= x | ||
operator.gt: {"max_value": arg, "exclude_max": True}, # lambda x: arg > x | ||
} | ||
if predicate.func in options: | ||
return options[predicate.func], None | ||
|
||
# TODO: handle lambdas by AST analysis | ||
|
||
return {}, predicate | ||
|
||
|
||
def get_integer_predicate_bounds(predicate: Predicate) -> ConstructivePredicate: | ||
kwargs, predicate = get_numeric_predicate_bounds(predicate) | ||
|
||
if "min_value" in kwargs: | ||
if kwargs["min_value"] != int(kwargs["min_value"]): | ||
kwargs["min_value"] = ceil(kwargs["min_value"]) | ||
elif kwargs.get("exclude_min", False): | ||
kwargs["min_value"] = int(kwargs["min_value"]) + 1 | ||
if "max_value" in kwargs: | ||
if kwargs["max_value"] != int(kwargs["max_value"]): | ||
kwargs["max_value"] = floor(kwargs["max_value"]) | ||
elif kwargs.get("exclude_max", False): | ||
kwargs["max_value"] = int(kwargs["max_value"]) - 1 | ||
|
||
kwargs = {k: v for k, v in kwargs.items() if k in {"min_value", "max_value"}} | ||
return kwargs, predicate |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
# This file is part of Hypothesis, which may be found at | ||
# https://github.com/HypothesisWorks/hypothesis/ | ||
# | ||
# Most of this work is copyright (C) 2013-2021 David R. MacIver | ||
# (david@drmaciver.com), but it contains contributions by others. See | ||
# CONTRIBUTING.rst for a full list of people who may hold copyright, and | ||
# consult the git log if you need to determine who owns an individual | ||
# contribution. | ||
# | ||
# This Source Code Form is subject to the terms of the Mozilla Public License, | ||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||
# obtain one at https://mozilla.org/MPL/2.0/. | ||
# | ||
# END HEADER | ||
|
||
import operator | ||
from functools import partial | ||
import decimal | ||
|
||
import pytest | ||
|
||
from hypothesis import given, strategies as st | ||
from hypothesis.errors import Unsatisfiable | ||
from hypothesis.strategies._internal.lazy import LazyStrategy | ||
from hypothesis.strategies._internal.numbers import BoundedIntStrategy | ||
from hypothesis.strategies._internal.strategies import FilteredStrategy | ||
|
||
from tests.common.utils import fails_with | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"strategy, predicate, start, end", | ||
[ | ||
# Integers with integer bounds | ||
(st.integers(1, 5), partial(operator.lt, 3), 4, 5), # lambda x: 3 < x | ||
(st.integers(1, 5), partial(operator.le, 3), 3, 5), # lambda x: 3 <= x | ||
(st.integers(1, 5), partial(operator.eq, 3), 3, 3), # lambda x: 3 == x | ||
(st.integers(1, 5), partial(operator.ge, 3), 1, 3), # lambda x: 3 >= x | ||
(st.integers(1, 5), partial(operator.gt, 3), 1, 2), # lambda x: 3 > x | ||
# Integers with non-integer bounds | ||
(st.integers(1, 5), partial(operator.lt, 3.5), 4, 5), | ||
(st.integers(1, 5), partial(operator.le, 3.5), 4, 5), | ||
(st.integers(1, 5), partial(operator.ge, 3.5), 1, 3), | ||
(st.integers(1, 5), partial(operator.gt, 3.5), 1, 3), | ||
], | ||
) | ||
@given(data=st.data()) | ||
def test_filter_rewriting(data, strategy, predicate, start, end): | ||
s = strategy.filter(predicate) | ||
assert isinstance(s, LazyStrategy) | ||
assert isinstance(s.wrapped_strategy, BoundedIntStrategy) | ||
assert s.wrapped_strategy.start == start | ||
assert s.wrapped_strategy.end == end | ||
value = data.draw(s) | ||
assert predicate(value) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"s", | ||
[ | ||
st.integers(1, 5).filter(partial(operator.lt, 6)), | ||
st.integers(1, 5).filter(partial(operator.eq, 3.5)), | ||
st.integers(1, 5).filter(partial(operator.eq, "can't compare to strings")), | ||
st.integers(1, 5).filter(partial(operator.ge, 0)), | ||
], | ||
) | ||
@fails_with(Unsatisfiable) | ||
@given(data=st.data()) | ||
def test_rewrite_unsatisfiable_filter(data, s): | ||
data.draw(s) | ||
|
||
|
||
def test_rewriting_does_not_compare_decimal_snan(): | ||
s = st.integers(1, 5).filter(partial(operator.eq, decimal.Decimal("snan"))) | ||
s.wrapped_strategy | ||
with pytest.raises(decimal.InvalidOperation): | ||
s.example() | ||
|
||
|
||
def mod2(x): | ||
return x % 2 | ||
|
||
|
||
@given( | ||
data=st.data(), | ||
predicates=st.permutations( | ||
[ | ||
partial(operator.lt, 1), | ||
partial(operator.le, 2), | ||
partial(operator.ge, 4), | ||
partial(operator.gt, 5), | ||
mod2, | ||
] | ||
), | ||
) | ||
def test_rewrite_filter_chains_with_some_unhandled(data, predicates): | ||
# Set up our strategy | ||
s = st.integers(1, 5) | ||
for p in predicates: | ||
s = s.filter(p) | ||
|
||
# Whatever value we draw is in fact valid for these strategies | ||
value = data.draw(s) | ||
for p in predicates: | ||
assert p(value), f"p={p!r}, value={value}" | ||
|
||
# No matter the order of the filters, we get the same resulting structure | ||
unwrapped = s.wrapped_strategy | ||
assert isinstance(unwrapped, FilteredStrategy) | ||
assert isinstance(unwrapped.filtered_strategy, BoundedIntStrategy) | ||
assert unwrapped.flat_conditions == (mod2,) |