Skip to content

Commit

Permalink
Merge pull request #3923 from tybug/more-shrinker-ir
Browse files Browse the repository at this point in the history
Migrate `reorder_examples` to the IR
  • Loading branch information
tybug committed Mar 18, 2024
2 parents 2129503 + b40999d commit 40daade
Show file tree
Hide file tree
Showing 11 changed files with 254 additions and 71 deletions.
3 changes: 3 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
RELEASE_TYPE: patch

This patch continues our work on refactoring shrinker internals (:issue:`3921`).
107 changes: 90 additions & 17 deletions hypothesis-python/src/hypothesis/internal/conjecture/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,14 @@ def end(self) -> int:
"""
return self.owner.ends[self.index]

@property
def ir_start(self) -> int:
return self.owner.ir_starts[self.index]

@property
def ir_end(self) -> int:
return self.owner.ir_ends[self.index]

@property
def depth(self):
"""Depth of this example in the example tree. The top-level example has a
Expand Down Expand Up @@ -529,6 +537,32 @@ def starts(self) -> IntList:
def ends(self) -> IntList:
return self.starts_and_ends[1]

class _ir_starts_and_ends(ExampleProperty):
def begin(self):
self.starts = IntList.of_length(len(self.examples))
self.ends = IntList.of_length(len(self.examples))

def start_example(self, i: int, label_index: int) -> None:
self.starts[i] = self.ir_node_count

def stop_example(self, i: int, *, discarded: bool) -> None:
self.ends[i] = self.ir_node_count

def finish(self) -> Tuple[IntList, IntList]:
return (self.starts, self.ends)

ir_starts_and_ends: "Tuple[IntList, IntList]" = calculated_example_property(
_ir_starts_and_ends
)

@property
def ir_starts(self) -> IntList:
return self.ir_starts_and_ends[0]

@property
def ir_ends(self) -> IntList:
return self.ir_starts_and_ends[1]

class _discarded(ExampleProperty):
def begin(self) -> None:
self.result: "Set[int]" = set() # type: ignore # IntList in parent class
Expand Down Expand Up @@ -910,7 +944,7 @@ def draw_boolean(
pass


@attr.s(slots=True)
@attr.s(slots=True, repr=False, eq=False)
class IRNode:
ir_type: IRTypeName = attr.ib()
value: IRType = attr.ib()
Expand All @@ -928,6 +962,22 @@ def copy(self, *, with_value: IRType) -> "IRNode":
was_forced=self.was_forced,
)

def __eq__(self, other):
if not isinstance(other, IRNode):
return NotImplemented

return (
self.ir_type == other.ir_type
and ir_value_equal(self.ir_type, self.value, other.value)
and ir_kwargs_equal(self.ir_type, self.kwargs, other.kwargs)
and self.was_forced == other.was_forced
)

def __repr__(self):
# repr to avoid "BytesWarning: str() on a bytes instance" for bytes nodes
forced_marker = " [forced]" if self.was_forced else ""
return f"{self.ir_type} {self.value!r}{forced_marker} {self.kwargs!r}"


def ir_value_permitted(value, ir_type, kwargs):
if ir_type == "integer":
Expand Down Expand Up @@ -962,6 +1012,24 @@ def ir_value_permitted(value, ir_type, kwargs):
raise NotImplementedError(f"unhandled type {type(value)} of ir value {value}")


def ir_value_equal(ir_type, v1, v2):
if ir_type != "float":
return v1 == v2
return float_to_int(v1) == float_to_int(v2)


def ir_kwargs_equal(ir_type, kwargs1, kwargs2):
if ir_type != "float":
return kwargs1 == kwargs2
return (
float_to_int(kwargs1["min_value"]) == float_to_int(kwargs2["min_value"])
and float_to_int(kwargs1["max_value"]) == float_to_int(kwargs2["max_value"])
and kwargs1["allow_nan"] == kwargs2["allow_nan"]
and kwargs1["smallest_nonzero_magnitude"]
== kwargs2["smallest_nonzero_magnitude"]
)


@dataclass_transform()
@attr.s(slots=True)
class ConjectureResult:
Expand Down Expand Up @@ -1876,9 +1944,10 @@ def draw_integer(

if self.ir_tree_nodes is not None and observe:
node = self._pop_ir_tree_node("integer", kwargs)
assert isinstance(node.value, int)
forced = node.value
fake_forced = not node.was_forced
if forced is None:
assert isinstance(node.value, int)
forced = node.value
fake_forced = True

value = self.provider.draw_integer(
**kwargs, forced=forced, fake_forced=fake_forced
Expand Down Expand Up @@ -1932,9 +2001,10 @@ def draw_float(

if self.ir_tree_nodes is not None and observe:
node = self._pop_ir_tree_node("float", kwargs)
assert isinstance(node.value, float)
forced = node.value
fake_forced = not node.was_forced
if forced is None:
assert isinstance(node.value, float)
forced = node.value
fake_forced = True

value = self.provider.draw_float(
**kwargs, forced=forced, fake_forced=fake_forced
Expand Down Expand Up @@ -1973,9 +2043,10 @@ def draw_string(
)
if self.ir_tree_nodes is not None and observe:
node = self._pop_ir_tree_node("string", kwargs)
assert isinstance(node.value, str)
forced = node.value
fake_forced = not node.was_forced
if forced is None:
assert isinstance(node.value, str)
forced = node.value
fake_forced = True

value = self.provider.draw_string(
**kwargs, forced=forced, fake_forced=fake_forced
Expand Down Expand Up @@ -2008,9 +2079,10 @@ def draw_bytes(

if self.ir_tree_nodes is not None and observe:
node = self._pop_ir_tree_node("bytes", kwargs)
assert isinstance(node.value, bytes)
forced = node.value
fake_forced = not node.was_forced
if forced is None:
assert isinstance(node.value, bytes)
forced = node.value
fake_forced = True

value = self.provider.draw_bytes(
**kwargs, forced=forced, fake_forced=fake_forced
Expand Down Expand Up @@ -2049,9 +2121,10 @@ def draw_boolean(

if self.ir_tree_nodes is not None and observe:
node = self._pop_ir_tree_node("boolean", kwargs)
assert isinstance(node.value, bool)
forced = node.value
fake_forced = not node.was_forced
if forced is None:
assert isinstance(node.value, bool)
forced = node.value
fake_forced = True

value = self.provider.draw_boolean(
**kwargs, forced=forced, fake_forced=fake_forced
Expand Down Expand Up @@ -2113,7 +2186,7 @@ def _pop_ir_tree_node(self, ir_type: IRTypeName, kwargs: IRKWargsType) -> IRNode
# that is allowed by the expected kwargs, then we can coerce this node
# into an aligned one by using its value. It's unclear how useful this is.
if not ir_value_permitted(node.value, node.ir_type, kwargs):
self.mark_invalid() # pragma: no cover # FIXME @tybug
self.mark_invalid()

return node

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,11 @@ def __stoppable_test_function(self, data):
# correct engine.
raise

def ir_tree_to_data(self, ir_tree_nodes):
data = ConjectureData.for_ir_tree(ir_tree_nodes)
self.__stoppable_test_function(data)
return data

def test_function(self, data):
if self.__pending_call_explanation is not None:
self.debug(self.__pending_call_explanation)
Expand Down Expand Up @@ -316,8 +321,7 @@ def test_function(self, data):

# drive the ir tree through the test function to convert it
# to a buffer
data = ConjectureData.for_ir_tree(data.examples.ir_tree_nodes)
self.__stoppable_test_function(data)
data = self.ir_tree_to_data(data.examples.ir_tree_nodes)
self.__data_cache[data.buffer] = data.as_result()

key = data.interesting_origin
Expand Down
23 changes: 11 additions & 12 deletions hypothesis-python/src/hypothesis/internal/conjecture/junkdrawer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@

ARRAY_CODES = ["B", "H", "I", "L", "Q", "O"]

T = TypeVar("T")


def array_or_list(
code: str, contents: Iterable[int]
Expand All @@ -45,25 +47,25 @@ def array_or_list(


def replace_all(
buffer: Sequence[int],
replacements: Iterable[Tuple[int, int, Sequence[int]]],
) -> bytes:
"""Substitute multiple replacement values into a buffer.
ls: Sequence[T],
replacements: Iterable[Tuple[int, int, Sequence[T]]],
) -> List[T]:
"""Substitute multiple replacement values into a list.
Replacements is a list of (start, end, value) triples.
"""

result = bytearray()
result: List[T] = []
prev = 0
offset = 0
for u, v, r in replacements:
result.extend(buffer[prev:u])
result.extend(ls[prev:u])
result.extend(r)
prev = v
offset += len(r) - (v - u)
result.extend(buffer[prev:])
assert len(result) == len(buffer) + offset
return bytes(result)
result.extend(ls[prev:])
assert len(result) == len(ls) + offset
return result


NEXT_ARRAY_CODE = dict(zip(ARRAY_CODES, ARRAY_CODES[1:]))
Expand Down Expand Up @@ -190,9 +192,6 @@ def uniform(random: Random, n: int) -> bytes:
return random.getrandbits(n * 8).to_bytes(n, "big")


T = TypeVar("T")


class LazySequenceCopy:
"""A "copy" of a sequence that works by inserting a mask in front
of the underlying sequence, so that you can mutate it without changing
Expand Down
30 changes: 20 additions & 10 deletions hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,7 @@ def calls(self):
return self.engine.call_count

def consider_new_tree(self, tree):
data = ConjectureData.for_ir_tree(tree)
self.engine.test_function(data)
data = self.engine.ir_tree_to_data(tree)

return self.consider_new_buffer(data.buffer)

Expand Down Expand Up @@ -1413,20 +1412,31 @@ def test_not_equal(x, y):
ex = chooser.choose(self.examples)
label = chooser.choose(ex.children).label

group = [c for c in ex.children if c.label == label]
if len(group) <= 1:
examples = [c for c in ex.children if c.label == label]
if len(examples) <= 1:
return

st = self.shrink_target
pieces = [st.buffer[ex.start : ex.end] for ex in group]
endpoints = [(ex.start, ex.end) for ex in group]
endpoints = [(ex.ir_start, ex.ir_end) for ex in examples]

Ordering.shrink(
pieces,
lambda ls: self.consider_new_buffer(
replace_all(st.buffer, [(u, v, r) for (u, v), r in zip(endpoints, ls)])
range(len(examples)),
lambda indices: self.consider_new_tree(
replace_all(
st.examples.ir_nodes,
[
(
u,
v,
st.examples.ir_nodes[
examples[i].ir_start : examples[i].ir_end
],
)
for (u, v), i in zip(endpoints, indices)
],
)
),
random=self.random,
key=lambda i: st.buffer[examples[i].start : examples[i].end],
)

def run_block_program(self, i, description, original, repeats=1):
Expand Down
22 changes: 14 additions & 8 deletions hypothesis-python/tests/conjecture/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,18 @@ def draw_boolean_kwargs(draw, *, use_forced=False):
return {"p": p, "forced": forced}


def kwargs_strategy(ir_type):
return {
"boolean": draw_boolean_kwargs(),
"integer": draw_integer_kwargs(),
"float": draw_float_kwargs(),
"bytes": draw_bytes_kwargs(),
"string": draw_string_kwargs(),
}[ir_type]


def ir_types_and_kwargs():
options = [
("boolean", draw_boolean_kwargs()),
("integer", draw_integer_kwargs()),
("float", draw_float_kwargs()),
("bytes", draw_bytes_kwargs()),
("string", draw_string_kwargs()),
]
return st.one_of(st.tuples(st.just(name), kws) for name, kws in options)
options = ["boolean", "integer", "float", "bytes", "string"]
return st.one_of(
st.tuples(st.just(name), kwargs_strategy(name)) for name in options
)
14 changes: 12 additions & 2 deletions hypothesis-python/tests/conjecture/test_dfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,16 @@

import pytest

from hypothesis import assume, example, given, note, reject, settings, strategies as st
from hypothesis import (
HealthCheck,
assume,
example,
given,
note,
reject,
settings,
strategies as st,
)
from hypothesis.internal.conjecture.dfa import DEAD, ConcreteDFA


Expand Down Expand Up @@ -112,7 +121,8 @@ def test_canonicalised_matches_same_strings(dfa, via_repr):
)


@settings(max_examples=20)
# filters about 80% of examples. should potentially improve at some point.
@settings(max_examples=20, suppress_health_check=[HealthCheck.filter_too_much])
@given(dfas())
def test_has_string_of_max_length(dfa):
length = dfa.max_length(dfa.start)
Expand Down

0 comments on commit 40daade

Please sign in to comment.