From 2ec0e1dfef248b90935779ffb0f9eb2c7dfdea8b Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Thu, 15 Feb 2024 22:15:54 -0500
Subject: [PATCH 01/43] wip

---
 hypothesis-python/docs/strategies.rst         |  33 ++++
 hypothesis-python/setup.py                    |   2 +
 hypothesis-python/src/hypothesis/_settings.py |  31 +++-
 hypothesis-python/src/hypothesis/core.py      |  48 +++--
 .../hypothesis/internal/conjecture/data.py    |  96 +++++++++-
 .../internal/conjecture/datatree.py           |  17 +-
 .../hypothesis/internal/conjecture/engine.py  |  33 +++-
 .../tests/conjecture/test_alt_backend.py      | 166 ++++++++++++++++++
 .../tests/cover/test_settings.py              |   1 +
 .../recorded/hypothesis_module_magic.txt      |   3 +
 10 files changed, 390 insertions(+), 40 deletions(-)
 create mode 100644 hypothesis-python/tests/conjecture/test_alt_backend.py

diff --git a/hypothesis-python/docs/strategies.rst b/hypothesis-python/docs/strategies.rst
index e9006d37ca..c60bef9d3f 100644
--- a/hypothesis-python/docs/strategies.rst
+++ b/hypothesis-python/docs/strategies.rst
@@ -204,3 +204,36 @@ loading our pytest plugin from your ``conftest.py`` instead::
 
     echo "pytest_plugins = ['hypothesis.extra.pytestplugin']\n" > tests/conftest.py
     pytest -p "no:hypothesispytest" ...
+
+
+.. _alternative-backends:
+
+-----------------------------------
+Alternative backends for Hypothesis
+-----------------------------------
+
+.. warning::
+
+   EXPERIMENTAL AND UNSTABLE.
+
+The importable name of a backend which Hypothesis should use to generate primitive
+types.  We aim to support heuristic-random, solver-based, and fuzzing-based backends.
+
+See :issue:`3086` for details, e.g. if you're interested in writing your own backend.
+(note that there is *no stable interface* for this; you'd be helping us work out
+what that should eventually look like, and we're likely to make regular breaking
+changes for some time to come)
+
+Using the prototype :pypi:`crosshair-tool` backend `via this schemathesis
+<https://github.com/pschanely/hypothesis-crosshair>`__,
+a solver-backed test might look something like:
+
+.. code-block:: python
+
+    from hypothesis import given, settings, strategies as st
+
+
+    @settings(backend="crosshair", database=None)
+    @given(st.integers())
+    def test_needs_solver(x):
+        assert x != 123456789
diff --git a/hypothesis-python/setup.py b/hypothesis-python/setup.py
index bb132c5f4d..cf29c2732a 100644
--- a/hypothesis-python/setup.py
+++ b/hypothesis-python/setup.py
@@ -71,6 +71,8 @@ def local_file(name):
     # We also leave the choice of timezone library to the user, since it
     # might be zoneinfo or pytz depending on version and configuration.
     "django": ["django>=3.2"],
+    # TODO: https://github.com/pschanely/hypothesis-crosshair/ extra once released
+    # "crosshair": ["hypothesis-crosshair-tool >= ???"],
 }
 
 extras["all"] = sorted(set(sum(extras.values(), [])))
diff --git a/hypothesis-python/src/hypothesis/_settings.py b/hypothesis-python/src/hypothesis/_settings.py
index b11ffc54a4..e4d9db27e2 100644
--- a/hypothesis-python/src/hypothesis/_settings.py
+++ b/hypothesis-python/src/hypothesis/_settings.py
@@ -165,6 +165,7 @@ def __init__(
         suppress_health_check: Collection["HealthCheck"] = not_set,  # type: ignore
         deadline: Union[int, float, datetime.timedelta, None] = not_set,  # type: ignore
         print_blob: bool = not_set,  # type: ignore
+        backend: str = "hypothesis",
     ) -> None:
         if parent is not None:
             check_type(settings, parent, "parent")
@@ -289,7 +290,13 @@ def __setattr__(self, name, value):
         raise AttributeError("settings objects are immutable")
 
     def __repr__(self):
-        bits = sorted(f"{name}={getattr(self, name)!r}" for name in all_settings)
+        from hypothesis.internal.conjecture.data import AVAILABLE_PROVIDERS
+
+        bits = sorted(
+            f"{name}={getattr(self, name)!r}"
+            for name in all_settings
+            if (name != "backend" or len(AVAILABLE_PROVIDERS) > 1)  # experimental
+        )
         return "settings({})".format(", ".join(bits))
 
     def show_changed(self):
@@ -706,6 +713,28 @@ def is_in_ci() -> bool:
 """,
 )
 
+
+def _backend_validator(value):
+    from hypothesis.internal.conjecture.data import AVAILABLE_PROVIDERS
+
+    if value not in AVAILABLE_PROVIDERS:
+        msg = f"Invalid backend, {value!r}. Valid options: {sorted(AVAILABLE_PROVIDERS)!r}"
+        raise InvalidArgument(msg)
+    return value
+
+
+settings._define_setting(
+    "backend",
+    default="hypothesis",
+    show_default=False,
+    validator=_backend_validator,
+    description="""
+EXPERIMENTAL AND UNSTABLE - see :ref:`alternative-backends`.
+The importable name of a backend which Hypothesis should use to generate primitive
+types.  We aim to support heuristic-random, solver-based, and fuzzing-based backends.
+""",
+)
+
 settings.lock_further_definitions()
 
 
diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
index e000da174f..75a100b031 100644
--- a/hypothesis-python/src/hypothesis/core.py
+++ b/hypothesis-python/src/hypothesis/core.py
@@ -758,6 +758,15 @@ def execute(data, function):
     return default_executor
 
 
+# This is hacky solution gives the experimental Crosshair backend a way to wrap
+# a context manager around all of the test cases generated for some test function,
+# and _that_ context can return a function for a context manager which will be
+# wrapped around each individual test case.  It's ugly, but it works.
+hacky_patchable_run_context_yielding_per_test_case_context = partial(
+    contextlib.nullcontext, enter_result=contextlib.nullcontext
+)
+
+
 class StateForActualGivenExecution:
     def __init__(self, stuff, test, settings, random, wrapped_test):
         self.test_runner = get_executor(stuff.selfy)
@@ -802,6 +811,7 @@ def execute_once(
         is_final=False,
         expected_failure=None,
         example_kwargs=None,
+        per_case_context_fn=contextlib.nullcontext,
     ):
         """Run the test function once, using ``data`` as input.
 
@@ -933,9 +943,10 @@ def run(data):
         with local_settings(self.settings):
             with deterministic_PRNG():
                 with BuildContext(data, is_final=is_final) as context:
-                    # Run the test function once, via the executor hook.
-                    # In most cases this will delegate straight to `run(data)`.
-                    result = self.test_runner(data, run)
+                    with per_case_context_fn():
+                        # Run the test function once, via the executor hook.
+                        # In most cases this will delegate straight to `run(data)`.
+                        result = self.test_runner(data, run)
 
         # If a failure was expected, it should have been raised already, so
         # instead raise an appropriate diagnostic error.
@@ -965,7 +976,9 @@ def run(data):
             ) from exception
         return result
 
-    def _execute_once_for_engine(self, data: ConjectureData) -> None:
+    def _execute_once_for_engine(
+        self, data: ConjectureData, *, per_case_context_fn: Any
+    ) -> None:
         """Wrapper around ``execute_once`` that intercepts test failure
         exceptions and single-test control exceptions, and turns them into
         appropriate method calls to `data` instead.
@@ -989,13 +1002,17 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None:
                 # settrace() contention *not* by our coverage tests.  Ah well.
                 with Tracer() as tracer:
                     try:
-                        result = self.execute_once(data)
+                        result = self.execute_once(
+                            data, per_case_context_fn=per_case_context_fn
+                        )
                         if data.status == Status.VALID:
                             self.explain_traces[None].add(frozenset(tracer.branches))
                     finally:
                         trace = tracer.branches
             else:
-                result = self.execute_once(data)
+                result = self.execute_once(
+                    data, per_case_context_fn=per_case_context_fn
+                )
             if result is not None:
                 fail_health_check(
                     self.settings,
@@ -1086,15 +1103,16 @@ def run_engine(self):
             else:
                 database_key = None
 
-        runner = self._runner = ConjectureRunner(
-            self._execute_once_for_engine,
-            settings=self.settings,
-            random=self.random,
-            database_key=database_key,
-        )
-        # Use the Conjecture engine to run the test function many times
-        # on different inputs.
-        runner.run()
+        with hacky_patchable_run_context_yielding_per_test_case_context() as ctx_fn:
+            runner = ConjectureRunner(
+                partial(self._execute_once_for_engine, per_case_context_fn=ctx_fn),
+                settings=self.settings,
+                random=self.random,
+                database_key=database_key,
+            )
+            # Use the Conjecture engine to run the test function many times
+            # on different inputs.
+            runner.run()
         note_statistics(runner.statistics)
         deliver_json_blob(
             {
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index cea40823be..bbfb864380 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -61,11 +61,14 @@
 from hypothesis.internal.intervalsets import IntervalSet
 
 if TYPE_CHECKING:
+    from typing import TypeAlias
+
     from typing_extensions import dataclass_transform
 
     from hypothesis.strategies import SearchStrategy
     from hypothesis.strategies._internal.strategies import Ex
 else:
+    TypeAlias = object
 
     def dataclass_transform():
         def wrapper(tp):
@@ -93,6 +96,42 @@ def wrapper(tp):
 T = TypeVar("T")
 
 
+class IntegerKWargs(TypedDict):
+    min_value: Optional[int]
+    max_value: Optional[int]
+    weights: Optional[Sequence[float]]
+    shrink_towards: int
+
+
+class FloatKWargs(TypedDict):
+    min_value: float
+    max_value: float
+    allow_nan: bool
+    smallest_nonzero_magnitude: float
+
+
+class StringKWargs(TypedDict):
+    intervals: IntervalSet
+    min_size: int
+    max_size: Optional[int]
+
+
+class BytesKWargs(TypedDict):
+    size: int
+
+
+class BooleanKWargs(TypedDict):
+    p: float
+
+
+IRType: TypeAlias = Union[int, str, bool, float, bytes]
+IRKWargsType: TypeAlias = Union[
+    IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs
+]
+# this would be "IRTypeType", but that's just confusing.
+IRLiteralType: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"]
+
+
 class ExtraInformation:
     """A class for holding shared state on a ``ConjectureData`` that should
     be added to the final ``ConjectureResult``."""
@@ -1425,21 +1464,39 @@ def permitted(f):
         return (sampler, forced_sign_bit, neg_clamper, pos_clamper, nasty_floats)
 
 
+# The set of available `PrimitiveProvider`s, by name.  Other libraries, such as
+# crosshair, can implement this interface and add themselves; at which point users
+# can configure which backend to use via settings.   Keys are the name of the library,
+# which doubles as the backend= setting, and values are importable class names.
+#
+# NOTE: this is a temporary interface.  We DO NOT promise to continue supporting it!
+#       (but if you want to experiment and don't mind breakage, here you go)
+AVAILABLE_PROVIDERS = {
+    "hypothesis": "hypothesis.internal.conjecture.data.PrimitiveProvider",
+}
+
+
 class ConjectureData:
     @classmethod
     def for_buffer(
         cls,
         buffer: Union[List[int], bytes],
+        *,
         observer: Optional[DataObserver] = None,
+        provider: type = PrimitiveProvider,
     ) -> "ConjectureData":
-        return cls(len(buffer), buffer, random=None, observer=observer)
+        return cls(
+            len(buffer), buffer, random=None, observer=observer, provider=provider
+        )
 
     def __init__(
         self,
         max_length: int,
         prefix: Union[List[int], bytes, bytearray],
+        *,
         random: Optional[Random],
         observer: Optional[DataObserver] = None,
+        provider: type = PrimitiveProvider,
     ) -> None:
         if observer is None:
             observer = DataObserver()
@@ -1470,7 +1527,7 @@ def __init__(
         self.draw_times: "Dict[str, float]" = {}
         self.max_depth = 0
         self.has_discards = False
-        self.provider = PrimitiveProvider(self)
+        self.provider = provider(self)
 
         self.__result: "Optional[ConjectureResult]" = None
 
@@ -1504,7 +1561,6 @@ def __init__(
 
         self.extra_information = ExtraInformation()
 
-        self.start_example(TOP_LABEL)
 
     def __repr__(self):
         return "ConjectureData(%s, %d bytes%s)" % (
@@ -1567,6 +1623,7 @@ def draw_integer(
             self.observer.draw_integer(
                 value, was_forced=forced is not None, kwargs=kwargs
             )
+            self.record_ir_value("integer", value, kwargs)
         return value
 
     def draw_float(
@@ -1604,6 +1661,7 @@ def draw_float(
             self.observer.draw_float(
                 value, kwargs=kwargs, was_forced=forced is not None
             )
+            self.record_ir_value("float", value, kwargs)
         return value
 
     def draw_string(
@@ -1627,6 +1685,7 @@ def draw_string(
             self.observer.draw_string(
                 value, kwargs=kwargs, was_forced=forced is not None
             )
+            self.record_ir_value("string", value, kwargs)
         return value
 
     def draw_bytes(
@@ -1646,6 +1705,7 @@ def draw_bytes(
             self.observer.draw_bytes(
                 value, kwargs=kwargs, was_forced=forced is not None
             )
+            self.record_ir_value("bytes", value, kwargs)
         return value
 
     def draw_boolean(
@@ -1667,6 +1727,7 @@ def draw_boolean(
             self.observer.draw_boolean(
                 value, kwargs=kwargs, was_forced=forced is not None
             )
+            self.record_ir_value("boolean", value, kwargs)
         return value
 
     def as_result(self) -> Union[ConjectureResult, _Overrun]:
@@ -1710,6 +1771,27 @@ def note(self, value: Any) -> None:
             value = repr(value)
         self.output += value
 
+    def record_ir_value(
+        self, ir_type: IRLiteralType, value: IRType, kwargs: IRKWargsType
+    ):
+        if isinstance(self.provider, PrimitiveProvider):
+            return
+        from hypothesis.internal.conjecture.engine import BUFFER_SIZE
+
+        cd = ConjectureData(
+            max_length=BUFFER_SIZE, prefix=b"", random=self.__random
+        )
+        # calling this will write to cd.buffer.
+        draw_func = getattr(cd, f"draw_{ir_type}")
+        draw_func(**kwargs, forced=value, observe=False)
+        forced_i = int_from_bytes(cd.buffer)
+        size = len(cd.buffer)
+        # drive this through draw_bits so the appropriate examples and blocks
+        # are created.
+        # fake_forced is so we force this value to be drawn, but still create
+        # examples and blocks as if it wasn't forced.
+        self.draw_bits(8 * size, forced=forced_i, fake_forced=True)
+
     def draw(
         self,
         strategy: "SearchStrategy[Ex]",
@@ -1852,7 +1934,9 @@ def choice(
         i = self.draw_integer(0, len(values) - 1, forced=forced_i, observe=observe)
         return values[i]
 
-    def draw_bits(self, n: int, *, forced: Optional[int] = None) -> int:
+    def draw_bits(
+        self, n: int, *, forced: Optional[int] = None, fake_forced=False
+    ) -> int:
         """Return an ``n``-bit integer from the underlying source of
         bytes. If ``forced`` is set to an integer will instead
         ignore the underlying source and simulate a draw as if it had
@@ -1886,7 +1970,7 @@ def draw_bits(self, n: int, *, forced: Optional[int] = None) -> int:
         buf = bytes(buf)
         result = int_from_bytes(buf)
 
-        self.__example_record.draw_bits(n, forced)
+        self.__example_record.draw_bits(n, forced and not fake_forced)
 
         initial = self.index
 
@@ -1894,7 +1978,7 @@ def draw_bits(self, n: int, *, forced: Optional[int] = None) -> int:
         self.buffer.extend(buf)
         self.index = len(self.buffer)
 
-        if forced is not None:
+        if forced is not None and not fake_forced:
             self.forced_indices.update(range(initial, self.index))
 
         self.blocks.add_endpoint(self.index)
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py b/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
index a9a6e5b196..3317df3182 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
@@ -10,7 +10,7 @@
 
 import itertools
 import math
-from typing import TYPE_CHECKING, List, Literal, Optional, Union
+from typing import List, Optional, Union
 
 import attr
 
@@ -24,23 +24,14 @@
     DataObserver,
     FloatKWargs,
     IntegerKWargs,
+    IRKWargsType,
+    IRLiteralType,
+    IRType,
     Status,
     StringKWargs,
 )
 from hypothesis.internal.floats import count_between_floats, float_to_int, int_to_float
 
-if TYPE_CHECKING:
-    from typing import TypeAlias
-else:
-    TypeAlias = object
-
-IRType: TypeAlias = Union[int, str, bool, float, bytes]
-IRKWargsType: TypeAlias = Union[
-    IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs
-]
-# this would be "IRTypeType", but that's just confusing.
-IRLiteralType: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"]
-
 
 class PreviouslyUnseenBehaviour(HypothesisException):
     pass
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index 99a170ca64..c463da94fa 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -8,6 +8,7 @@
 # v. 2.0. If a copy of the MPL was not distributed with this file, You can
 # obtain one at https://mozilla.org/MPL/2.0/.
 
+import importlib
 import math
 import time
 from collections import defaultdict
@@ -24,10 +25,12 @@
 from hypothesis.internal.cache import LRUReusedCache
 from hypothesis.internal.compat import ceil, int_from_bytes
 from hypothesis.internal.conjecture.data import (
+    AVAILABLE_PROVIDERS,
     ConjectureData,
     ConjectureResult,
     DataObserver,
     Overrun,
+    PrimitiveProvider,
     Status,
 )
 from hypothesis.internal.conjecture.datatree import (
@@ -164,6 +167,7 @@ def __init__(
         self.__data_cache = LRUReusedCache(CACHE_SIZE)
 
         self.__pending_call_explanation = None
+        self._switch_to_primitive_provider = False
 
     def explain_next_call_as(self, explanation):
         self.__pending_call_explanation = explanation
@@ -191,7 +195,7 @@ def should_optimise(self):
         return Phase.target in self.settings.phases
 
     def __tree_is_exhausted(self):
-        return self.tree.is_exhausted
+        return self.tree.is_exhausted and self.settings.backend == "hypothesis"
 
     def __stoppable_test_function(self, data):
         """Run ``self._test_function``, but convert a ``StopTest`` exception
@@ -695,6 +699,14 @@ def generate_new_examples(self):
         ran_optimisations = False
 
         while self.should_generate_more():
+            # There's no convenient way to track redundancy for custom backends
+            # yet. Will possibly improved when everything moves to the ir and we
+            # can use the DataTree for all backends?
+            if self.settings.backend != "hypothesis":
+                data = self.new_conjecture_data(prefix=b"", max_length=BUFFER_SIZE)
+                self.test_function(data)
+                continue
+
             self._current_phase = "generate"
             prefix = self.generate_novel_prefix()
             assert len(prefix) <= BUFFER_SIZE
@@ -805,14 +817,12 @@ def generate_mutations_from(self, data):
                     break
 
                 group = self.random.choice(groups)
-
                 ex1, ex2 = (
                     data.examples[i] for i in sorted(self.random.sample(group, 2))
                 )
                 assert ex1.end <= ex2.start
 
                 replacements = [data.buffer[e.start : e.end] for e in [ex1, ex2]]
-
                 replacement = self.random.choice(replacements)
 
                 try:
@@ -822,7 +832,7 @@ def generate_mutations_from(self, data):
                     # wrong - labels matching are only a best guess as to
                     # whether the two are equivalent - but it doesn't
                     # really matter. It may not achieve the desired result
-                    # but it's still a perfectly acceptable choice sequence.
+                    # but it's still a perfectly acceptable choice sequence
                     # to try.
                     new_data = self.cached_test_function(
                         data.buffer[: ex1.start]
@@ -899,8 +909,12 @@ def pareto_optimise(self):
             ParetoOptimiser(self).run()
 
     def _run(self):
+        # have to use the primitive provider to interpret database bits...
+        self._switch_to_primitive_provider = True
         with self._log_phase_statistics("reuse"):
             self.reuse_existing_examples()
+        # ...but we should use the supplied provider when generating...
+        self._switch_to_primitive_provider = False
         with self._log_phase_statistics("generate"):
             self.generate_new_examples()
             # We normally run the targeting phase mixed in with the generate phase,
@@ -909,20 +923,29 @@ def _run(self):
             if Phase.generate not in self.settings.phases:
                 self._current_phase = "target"
                 self.optimise_targets()
+        # ...and back to the primitive provider when shrinking.
+        self._switch_to_primitive_provider = True
         with self._log_phase_statistics("shrink"):
             self.shrink_interesting_examples()
         self.exit_with(ExitReason.finished)
 
     def new_conjecture_data(self, prefix, max_length=BUFFER_SIZE, observer=None):
+        if self.settings.backend == "hypothesis" or self._switch_to_primitive_provider:
+            provider_cls = PrimitiveProvider
+        else:
+            mname, cname = AVAILABLE_PROVIDERS[self.settings.backend].rsplit(".", 1)
+            provider_cls = getattr(importlib.import_module(mname), cname)
+
         return ConjectureData(
             prefix=prefix,
             max_length=max_length,
             random=self.random,
             observer=observer or self.tree.new_observer(),
+            provider=provider_cls,
         )
 
     def new_conjecture_data_for_buffer(self, buffer):
-        return ConjectureData.for_buffer(buffer, observer=self.tree.new_observer())
+        return self.new_conjecture_data(buffer, max_length=len(buffer))
 
     def shrink_interesting_examples(self):
         """If we've found interesting examples, try to replace each of them
diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py
new file mode 100644
index 0000000000..16987ccba7
--- /dev/null
+++ b/hypothesis-python/tests/conjecture/test_alt_backend.py
@@ -0,0 +1,166 @@
+# This file is part of Hypothesis, which may be found at
+# https://github.com/HypothesisWorks/hypothesis/
+#
+# Copyright the Hypothesis Authors.
+# Individual contributors are listed in AUTHORS.rst and the git log.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
+import math
+from contextlib import contextmanager
+from random import Random
+from typing import Optional, Sequence
+import sys
+
+import pytest
+
+from hypothesis import given, settings, strategies as st
+from hypothesis.database import InMemoryExampleDatabase
+from hypothesis.internal.conjecture.data import AVAILABLE_PROVIDERS, ConjectureData
+from hypothesis.internal.floats import SIGNALING_NAN
+from hypothesis.internal.intervalsets import IntervalSet
+
+
+class PrngProvider:
+    # A test-only implementation of the PrimitiveProvider interface, which uses
+    # a very simple PRNG to choose each value.  Dumb but efficient, and entirely
+    # independent of our real backend
+
+    def __init__(self, conjecturedata: "ConjectureData", /) -> None:
+        self.prng = Random(conjecturedata.buffer or None)
+
+    def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool:
+        if forced is not None:
+            return forced
+        return self.prng.random() < p
+
+    def draw_integer(
+        self,
+        min_value: Optional[int] = None,
+        max_value: Optional[int] = None,
+        *,
+        # weights are for choosing an element index from a bounded range
+        weights: Optional[Sequence[float]] = None,
+        shrink_towards: int = 0,
+        forced: Optional[int] = None,
+    ) -> int:
+        assert isinstance(shrink_towards, int)  # otherwise ignored here
+        if forced is not None:
+            return forced
+
+        if weights is not None:
+            assert min_value is not None
+            assert max_value is not None
+            return self.prng.choices(range(min_value, max_value + 1), weights=weights)
+
+        if min_value is None and max_value is None:
+            min_value = -(2**127)
+            max_value = 2**127 - 1
+        elif min_value is None:
+            min_value = max_value - 2**64
+        elif max_value is None:
+            max_value = min_value + 2**64
+        return self.prng.randint(min_value, max_value)
+
+    def draw_float(
+        self,
+        *,
+        min_value: float = -math.inf,
+        max_value: float = math.inf,
+        allow_nan: bool = True,
+        smallest_nonzero_magnitude: float,
+        forced: Optional[float] = None,
+    ) -> float:
+        if forced is not None:
+            return forced
+
+        if allow_nan and self.prng.random() < 1 / 32:
+            nans = [math.nan, -math.nan, SIGNALING_NAN, -SIGNALING_NAN]
+            value = self.prng.choice(nans)
+            return value
+
+        # small chance of inf values, if they are in bounds
+        if min_value <= math.inf <= max_value and self.prng.random() < 1 / 32:
+            return math.inf
+        if min_value <= -math.inf <= max_value and self.prng.random() < 1 / 32:
+            return -math.inf
+
+        # get rid of infs, they cause nans if we pass them to prng.uniform
+        if min_value in [-math.inf, math.inf]:
+            min_value = math.copysign(1, min_value) * sys.float_info.max
+            # being too close to the bounds causes prng.uniform to only return
+            # inf.
+            min_value /= 2
+        if max_value in [-math.inf, math.inf]:
+            max_value = math.copysign(1, max_value) * sys.float_info.max
+            max_value /= 2
+
+        value = self.prng.uniform(min_value, max_value)
+        if value and abs(value) < smallest_nonzero_magnitude:
+            return math.copysign(0.0, value)
+        return value
+
+    def draw_string(
+        self,
+        intervals: IntervalSet,
+        *,
+        min_size: int = 0,
+        max_size: Optional[int] = None,
+        forced: Optional[str] = None,
+    ) -> str:
+        if forced is not None:
+            return forced
+        size = self.prng.randint(
+            min_size, max(min_size, min(100 if max_size is None else max_size, 100))
+        )
+        return "".join(map(chr, self.prng.choices(intervals, k=size)))
+
+    def draw_bytes(self, size: int, *, forced: Optional[bytes] = None) -> bytes:
+        if forced is not None:
+            return forced
+        return self.prng.randbytes(size)
+
+
+@contextmanager
+def temp_register_backend():
+    try:
+        AVAILABLE_PROVIDERS["prng"] = f"{__name__}.{PrngProvider.__name__}"
+        yield
+    finally:
+        AVAILABLE_PROVIDERS.pop("prng")
+
+
+@pytest.mark.parametrize(
+    "strategy",
+    [
+        st.booleans(),
+        st.integers(0, 10),
+        st.floats(allow_nan=False),
+        st.text(max_size=5),
+        st.binary(),
+    ],
+    ids=repr,
+)
+def test_find_with_backend_then_convert_to_buffer_shrink_and_replay(strategy):
+    db = InMemoryExampleDatabase()
+    assert not db.data
+
+    with temp_register_backend():
+
+        @settings(database=db, backend="prng")
+        @given(strategy)
+        def test(value):
+            if isinstance(value, float):
+                # randomly generating 0 for floats is really unlikely
+                assert value not in [math.inf, -math.inf]
+            else:
+                assert value
+
+        with pytest.raises(AssertionError):
+            test()
+
+    assert db.data
+    buffers = {x for x in db.data[next(iter(db.data))] if x}
+    assert buffers, db.data
diff --git a/hypothesis-python/tests/cover/test_settings.py b/hypothesis-python/tests/cover/test_settings.py
index 2ab6838d56..b28c23dba9 100644
--- a/hypothesis-python/tests/cover/test_settings.py
+++ b/hypothesis-python/tests/cover/test_settings.py
@@ -456,6 +456,7 @@ def test_derandomise_with_explicit_database_is_invalid():
         {"deadline": 0},
         {"deadline": True},
         {"deadline": False},
+        {"backend": "this_backend_does_not_exist"},
     ],
 )
 def test_invalid_settings_are_errors(kwargs):
diff --git a/hypothesis-python/tests/ghostwriter/recorded/hypothesis_module_magic.txt b/hypothesis-python/tests/ghostwriter/recorded/hypothesis_module_magic.txt
index a294ba02a4..43bea92c84 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/hypothesis_module_magic.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/hypothesis_module_magic.txt
@@ -74,6 +74,7 @@ def test_fuzz_seed(seed: typing.Hashable) -> None:
     suppress_health_check=st.just(not_set),
     deadline=st.just(not_set),
     print_blob=st.just(not_set),
+    backend=st.just("hypothesis"),
 )
 def test_fuzz_settings(
     parent: typing.Optional[hypothesis.settings],
@@ -87,6 +88,7 @@ def test_fuzz_settings(
     suppress_health_check,
     deadline: typing.Union[int, float, datetime.timedelta, None],
     print_blob: bool,
+    backend: str,
 ) -> None:
     hypothesis.settings(
         parent=parent,
@@ -100,6 +102,7 @@ def test_fuzz_settings(
         suppress_health_check=suppress_health_check,
         deadline=deadline,
         print_blob=print_blob,
+        backend=backend,
     )
 
 

From e514d735ecc663119bc6b08bcbf1ea7a748e7d40 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Thu, 22 Feb 2024 19:11:00 -0500
Subject: [PATCH 02/43] wip

---
 hypothesis-python/RELEASE.rst                 |   4 +
 hypothesis-python/docs/changes.rst            |  28 +++
 hypothesis-python/docs/ghostwriter.rst        |  59 ++++++
 .../docs/schema_observations.json             |   2 +-
 hypothesis-python/src/hypothesis/core.py      |  34 ++-
 .../src/hypothesis/extra/ghostwriter.py       | 150 +++++++------
 .../hypothesis/internal/conjecture/data.py    | 198 +++++++++++++-----
 .../hypothesis/internal/conjecture/engine.py  |  60 ++++--
 .../src/hypothesis/internal/escalation.py     |   1 +
 .../src/hypothesis/internal/observability.py  |   3 +
 hypothesis-python/src/hypothesis/stateful.py  |  55 +++--
 hypothesis-python/src/hypothesis/version.py   |   2 +-
 .../tests/conjecture/test_alt_backend.py      |   7 +-
 .../tests/cover/test_observability.py         |  41 ++++
 .../tests/cover/test_stateful.py              |  18 +-
 .../recorded/addition_op_magic.txt            |   4 +-
 .../recorded/addition_op_multimagic.txt       |  18 +-
 .../recorded/division_binop_error_handler.txt |   4 +-
 .../recorded/division_operator.txt            |   6 +-
 .../division_operator_with_annotations.txt    |   6 +-
 ...sion_roundtrip_arithmeticerror_handler.txt |   4 +-
 .../division_roundtrip_error_handler.txt      |   4 +-
 ...trip_error_handler_without_annotations.txt |   4 +-
 .../division_roundtrip_typeerror_handler.txt  |   4 +-
 .../recorded/hypothesis_module_magic.txt      |   3 +-
 .../ghostwriter/recorded/matmul_magic.txt     |  34 +++
 .../recorded/multiplication_magic.txt         |  41 ++++
 .../recorded/multiplication_operator.txt      |  24 ++-
 .../multiplication_operator_unittest.txt      |  24 ++-
 .../tests/ghostwriter/test_expected_output.py |   2 +
 .../tests/ghostwriter/test_ghostwriter.py     |   2 +-
 .../tests/ghostwriter/test_ghostwriter_cli.py |   4 +-
 requirements/coverage.txt                     |   4 +-
 requirements/fuzzing.txt                      |   8 +-
 requirements/test.txt                         |   2 +-
 requirements/tools.txt                        |  14 +-
 tooling/src/hypothesistooling/__main__.py     |  10 +-
 ...-05-how-not-to-die-hard-with-hypothesis.md |   7 +-
 38 files changed, 652 insertions(+), 243 deletions(-)
 create mode 100644 hypothesis-python/RELEASE.rst
 create mode 100644 hypothesis-python/tests/ghostwriter/recorded/matmul_magic.txt
 create mode 100644 hypothesis-python/tests/ghostwriter/recorded/multiplication_magic.txt

diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
new file mode 100644
index 0000000000..8f22e9b5d1
--- /dev/null
+++ b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,4 @@
+RELEASE_TYPE: minor
+
+This release adds the **experimental and unstable** :obj:`~hypothesis.settings.backend`
+setting.  See :ref:`alternative-backends` for details.
diff --git a/hypothesis-python/docs/changes.rst b/hypothesis-python/docs/changes.rst
index 21f5415630..da22ab14b8 100644
--- a/hypothesis-python/docs/changes.rst
+++ b/hypothesis-python/docs/changes.rst
@@ -18,6 +18,34 @@ Hypothesis 6.x
 
     .. include:: ../RELEASE.rst
 
+.. _v6.98.9:
+
+-------------------
+6.98.9 - 2024-02-20
+-------------------
+
+This patch ensures that :doc:`observability <observability>` outputs include
+an informative repr for :class:`~hypothesis.stateful.RuleBasedStateMachine`
+stateful tests, along with more detailed timing information.
+
+.. _v6.98.8:
+
+-------------------
+6.98.8 - 2024-02-18
+-------------------
+
+This patch improves :doc:`the Ghostwriter <ghostwriter>` for binary operators.
+
+.. _v6.98.7:
+
+-------------------
+6.98.7 - 2024-02-18
+-------------------
+
+This patch improves import-detection in :doc:`the Ghostwriter <ghostwriter>`
+(:issue:`3884`), particularly for :func:`~hypothesis.strategies.from_type`
+and strategies from ``hypothesis.extra.*``.
+
 .. _v6.98.6:
 
 -------------------
diff --git a/hypothesis-python/docs/ghostwriter.rst b/hypothesis-python/docs/ghostwriter.rst
index 1b0596ab11..1ed4a12cf1 100644
--- a/hypothesis-python/docs/ghostwriter.rst
+++ b/hypothesis-python/docs/ghostwriter.rst
@@ -4,3 +4,62 @@ Ghostwriting tests for you
 
 .. automodule:: hypothesis.extra.ghostwriter
    :members:
+
+A note for test-generation researchers
+--------------------------------------
+
+Ghostwritten tests are intended as a *starting point for human authorship*,
+to demonstrate best practice, help novices past blank-page paralysis, and save time
+for experts.  They *may* be ready-to-run, or include placeholders and ``# TODO:``
+comments to fill in strategies for unknown types.  In either case, improving tests
+for their own code gives users a well-scoped and immediately rewarding context in
+which to explore property-based testing.
+
+By contrast, most test-generation tools aim to produce ready-to-run test suites...
+and implicitly assume that the current behavior is the desired behavior.
+However, the code might contain bugs, and we want our tests to fail if it does!
+Worse, tools require that the code to be tested is finished and executable,
+making it impossible to generate tests as part of the development process.
+
+`Fraser 2013`_ found that evolving a high-coverage test suite (e.g. Randoop_, EvoSuite_, Pynguin_)
+"leads to clear improvements in commonly applied quality metrics such as code coverage
+[but] no measurable improvement in the number of bugs actually found by developers"
+and that "generating a set of test cases, even high coverage test cases,
+does not necessarily improve our ability to test software".
+Invariant detection (famously Daikon_; in PBT see e.g. `Alonso 2022`_,
+QuickSpec_, Speculate_) relies on code execution. Program slicing (e.g. FUDGE_,
+FuzzGen_, WINNIE_) requires downstream consumers of the code to test.
+
+Ghostwriter inspects the function name, argument names and types, and docstrings.
+It can be used on buggy or incomplete code, runs in a few seconds, and produces
+a single semantically-meaningful test per function or group of functions.
+Rather than detecting regressions, these tests check semantic properties such as
+`encode/decode or save/load round-trips <https://zhd.dev/ghostwriter/?q=gzip.compress>`__,
+for `commutative, associative, and distributive operations
+<https://zhd.dev/ghostwriter/?q=operator.mul>`__,
+`equivalence between methods <https://zhd.dev/ghostwriter/?q=operator.add+numpy.add>`__,
+`array shapes <https://zhd.dev/ghostwriter/?q=numpy.matmul>`__,
+and idempotence.  Where no property is detected, we simply check for
+'no error on valid input' and allow the user to supply their own invariants.
+
+Evaluations such as the SBFT24_ competition_ measure performance on a task which
+the Ghostwriter is not intended to perform.  I'd love to see qualitative user
+studies, such as `PBT in Practice`_ for test generation, which could check
+whether the Ghostwriter is onto something or tilting at windmills.
+If you're interested in similar questions, `drop me an email`_!
+
+.. _Daikon: https://plse.cs.washington.edu/daikon/pubs/
+.. _Alonso 2022: https://doi.org/10.1145/3540250.3559080
+.. _QuickSpec: http://www.cse.chalmers.se/~nicsma/papers/quickspec2.pdf
+.. _Speculate: https://matela.com.br/paper/speculate.pdf
+.. _FUDGE: https://research.google/pubs/pub48314/
+.. _FuzzGen: https://www.usenix.org/conference/usenixsecurity20/presentation/ispoglou
+.. _WINNIE: https://www.ndss-symposium.org/wp-content/uploads/2021-334-paper.pdf
+.. _Fraser 2013: https://doi.org/10.1145/2483760.2483774
+.. _Randoop: https://homes.cs.washington.edu/~mernst/pubs/feedback-testgen-icse2007.pdf
+.. _EvoSuite: https://www.evosuite.org/wp-content/papercite-data/pdf/esecfse11.pdf
+.. _Pynguin: https://arxiv.org/abs/2007.14049
+.. _SBFT24: https://arxiv.org/abs/2401.15189
+.. _competition: https://github.com/ThunderKey/python-tool-competition-2024
+.. _PBT in Practice: https://harrisongoldste.in/papers/icse24-pbt-in-practice.pdf
+.. _drop me an email: mailto:zac@zhd.dev?subject=Hypothesis%20Ghostwriter%20research
diff --git a/hypothesis-python/docs/schema_observations.json b/hypothesis-python/docs/schema_observations.json
index 4047c2ccd2..357dae89a4 100644
--- a/hypothesis-python/docs/schema_observations.json
+++ b/hypothesis-python/docs/schema_observations.json
@@ -46,7 +46,7 @@
                 },
                 "timing": {
                     "type": "object",
-                    "description": "The time in seconds taken by non-overlapping parts of this test case.  Hypothesis reports execute_test, and generate:{argname} for each argument.",
+                    "description": "The time in seconds taken by non-overlapping parts of this test case.  Hypothesis reports execute:test, and generate:{argname} for each argument.",
                     "additionalProperties": {
                         "type": "number",
                         "minimum": 0
diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
index 75a100b031..1758acda41 100644
--- a/hypothesis-python/src/hypothesis/core.py
+++ b/hypothesis-python/src/hypothesis/core.py
@@ -844,8 +844,9 @@ def test(*args, **kwargs):
                     in_drawtime = math.fsum(data.draw_times.values()) - arg_drawtime
                     runtime = datetime.timedelta(seconds=finish - start - in_drawtime)
                     self._timing_features = {
-                        "execute_test": finish - start - in_drawtime,
+                        "execute:test": finish - start - in_drawtime,
                         **data.draw_times,
+                        **data._stateful_run_times,
                     }
 
                 if (current_deadline := self.settings.deadline) is not None:
@@ -937,6 +938,9 @@ def run(data):
                     msg, format_arg = data._sampled_from_all_strategies_elements_message
                     add_note(e, msg.format(format_arg))
                 raise
+            finally:
+                if parts := getattr(data, "_stateful_repr_parts", None):
+                    self._string_repr = "\n".join(parts)
 
         # self.test_runner can include the execute_example method, or setup/teardown
         # _example, so it's important to get the PRNG and build context in place first.
@@ -953,7 +957,11 @@ def run(data):
         if expected_failure is not None:
             exception, traceback = expected_failure
             if isinstance(exception, DeadlineExceeded) and (
-                runtime_secs := self._timing_features.get("execute_test")
+                runtime_secs := math.fsum(
+                    v
+                    for k, v in self._timing_features.items()
+                    if k.startswith("execute:")
+                )
             ):
                 report(
                     "Unreliable test timings! On an initial run, this "
@@ -1085,6 +1093,7 @@ def _execute_once_for_engine(
                     arguments={**self._jsonable_arguments, **data._observability_args},
                     timing=self._timing_features,
                     coverage=tractable_coverage_report(trace) or None,
+                    phase=phase,
                 )
                 deliver_json_blob(tc)
             self._timing_features = {}
@@ -1166,15 +1175,18 @@ def run_engine(self):
             assert info._expected_exception is not None
             try:
                 with with_reporter(fragments.append):
-                    self.execute_once(
-                        ran_example,
-                        print_example=not self.is_find,
-                        is_final=True,
-                        expected_failure=(
-                            info._expected_exception,
-                            info._expected_traceback,
-                        ),
-                    )
+                    # TODO double check this is necessary.
+                    with hacky_patchable_run_context_yielding_per_test_case_context() as per_case_context_fn:
+                        self.execute_once(
+                            ran_example,
+                            print_example=not self.is_find,
+                            is_final=True,
+                            expected_failure=(
+                                info._expected_exception,
+                                info._expected_traceback,
+                            ),
+                            per_case_context_fn=per_case_context_fn
+                        )
             except (UnsatisfiedAssumption, StopTest) as e:
                 err = Flaky(
                     "Unreliable assumption: An example which satisfied "
diff --git a/hypothesis-python/src/hypothesis/extra/ghostwriter.py b/hypothesis-python/src/hypothesis/extra/ghostwriter.py
index 3404088ed0..8917d5bd87 100644
--- a/hypothesis-python/src/hypothesis/extra/ghostwriter.py
+++ b/hypothesis-python/src/hypothesis/extra/ghostwriter.py
@@ -482,7 +482,6 @@ def _get_params(func: Callable) -> Dict[str, inspect.Parameter]:
                     kind = inspect.Parameter.KEYWORD_ONLY
                     continue  # we omit *varargs, if there are any
                 if _iskeyword(arg.lstrip("*")) or not arg.lstrip("*").isidentifier():
-                    print(repr(args))
                     break  # skip all subsequent params if this name is invalid
                 params.append(inspect.Parameter(name=arg, kind=kind))
 
@@ -588,6 +587,8 @@ def _imports_for_object(obj):
     """Return the imports for `obj`, which may be empty for e.g. lambdas"""
     if isinstance(obj, (re.Pattern, re.Match)):
         return {"re"}
+    if isinstance(obj, st.SearchStrategy):
+        return _imports_for_strategy(obj)
     try:
         if is_generic_type(obj):
             if isinstance(obj, TypeVar):
@@ -606,19 +607,19 @@ def _imports_for_strategy(strategy):
     # If we have a lazy from_type strategy, because unwrapping it gives us an
     # error or invalid syntax, import that type and we're done.
     if isinstance(strategy, LazyStrategy):
-        if strategy.function.__name__ in (
-            st.from_type.__name__,
-            st.from_regex.__name__,
-        ):
-            return {
-                imp
-                for arg in set(strategy._LazyStrategy__args)
-                | set(strategy._LazyStrategy__kwargs.values())
-                for imp in _imports_for_object(arg)
-            }
+        imports = {
+            imp
+            for arg in set(strategy._LazyStrategy__args)
+            | set(strategy._LazyStrategy__kwargs.values())
+            for imp in _imports_for_object(_strip_typevars(arg))
+        }
+        if re.match(r"from_(type|regex)\(", repr(strategy)):
+            if repr(strategy).startswith("from_type("):
+                return {module for module, _ in imports}
+            return imports
         elif _get_module(strategy.function).startswith("hypothesis.extra."):
             module = _get_module(strategy.function).replace("._array_helpers", ".numpy")
-            return {(module, strategy.function.__name__)}
+            return {(module, strategy.function.__name__)} | imports
 
     imports = set()
     with warnings.catch_warnings():
@@ -672,6 +673,9 @@ def _valid_syntax_repr(strategy):
         if isinstance(strategy, OneOfStrategy):
             seen = set()
             elems = []
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", SmallSearchSpaceWarning)
+                strategy.element_strategies  # might warn on first access
             for s in strategy.element_strategies:
                 if isinstance(s, SampledFromStrategy) and s.elements == (os.environ,):
                     continue
@@ -694,7 +698,11 @@ def _valid_syntax_repr(strategy):
         # Return a syntactically-valid strategy repr, including fixing some
         # strategy reprs and replacing invalid syntax reprs with `"nothing()"`.
         # String-replace to hide the special case in from_type() for Decimal('snan')
-        r = repr(strategy).replace(".filter(_can_hash)", "")
+        r = (
+            repr(strategy)
+            .replace(".filter(_can_hash)", "")
+            .replace("hypothesis.strategies.", "")
+        )
         # Replace <unknown> with ... in confusing lambdas
         r = re.sub(r"(lambda.*?: )(<unknown>)([,)])", r"\1...\3", r)
         compile(r, "<string>", "eval")
@@ -724,9 +732,10 @@ def _get_module_helper(obj):
 
     dots = [i for i, c in enumerate(module_name) if c == "."] + [None]
     for idx in dots:
-        if getattr(sys.modules.get(module_name[:idx]), obj.__name__, None) is obj:
-            KNOWN_FUNCTION_LOCATIONS[obj] = module_name[:idx]
-            return module_name[:idx]
+        for candidate in (module_name[:idx].lstrip("_"), module_name[:idx]):
+            if getattr(sys.modules.get(candidate), obj.__name__, None) is obj:
+                KNOWN_FUNCTION_LOCATIONS[obj] = candidate
+                return candidate
     return module_name
 
 
@@ -755,7 +764,7 @@ def _get_qualname(obj, *, include_module=False):
 
 
 def _write_call(
-    func: Callable, *pass_variables: str, except_: Except, assign: str = ""
+    func: Callable, *pass_variables: str, except_: Except = Exception, assign: str = ""
 ) -> str:
     """Write a call to `func` with explicit and implicit arguments.
 
@@ -1000,6 +1009,9 @@ def _parameter_to_annotation(parameter: Any) -> Optional[_AnnotationData]:
         else:
             type_name = str(parameter)
 
+    if type_name.startswith("hypothesis.strategies."):
+        return _AnnotationData(type_name.replace("hypothesis.strategies", "st"), set())
+
     origin_type = get_origin(parameter)
 
     # if not generic or no generic arguments
@@ -1045,9 +1057,6 @@ def _make_test(imports: ImportSet, body: str) -> str:
     # Discarding "builtins." and "__main__" probably isn't particularly useful
     # for user code, but important for making a good impression in demos.
     body = body.replace("builtins.", "").replace("__main__.", "")
-    body = body.replace("hypothesis.strategies.", "st.")
-    if "st.from_type(typing." in body:
-        imports.add("typing")
     imports |= {("hypothesis", "given"), ("hypothesis", "strategies as st")}
     if "        reject()\n" in body:
         imports.add(("hypothesis", "reject"))
@@ -1260,11 +1269,29 @@ def make_(how, *args, **kwargs):
         hints = get_type_hints(func)
         hints.pop("return", None)
         params = _get_params(func)
-        if len(hints) == len(params) == 2:
-            a, b = hints.values()
+        if (len(hints) == len(params) == 2) or (
+            _get_module(func) == "operator"
+            and "item" not in func.__name__
+            and tuple(params) in [("a", "b"), ("x", "y")]
+        ):
+            a, b = hints.values() or [Any, Any]
             arg1, arg2 = params
             if a == b and len(arg1) == len(arg2) <= 3:
-                make_(_make_binop_body, func, annotate=annotate)
+                # https://en.wikipedia.org/wiki/Distributive_property#Other_examples
+                known = {
+                    "mul": "add",
+                    "matmul": "add",
+                    "or_": "and_",
+                    "and_": "or_",
+                }.get(func.__name__, "")
+                distributes_over = getattr(sys.modules[_get_module(func)], known, None)
+                make_(
+                    _make_binop_body,
+                    func,
+                    commutative=func.__name__ != "matmul",
+                    distributes_over=distributes_over,
+                    annotate=annotate,
+                )
                 del by_name[name]
 
     # Look for Numpy ufuncs or gufuncs, and write array-oriented tests for them.
@@ -1469,10 +1496,17 @@ def roundtrip(
     return _make_test(*_make_roundtrip_body(funcs, except_, style, annotate))
 
 
-def _make_equiv_body(funcs, except_, style, annotate):
+def _get_varnames(funcs):
     var_names = [f"result_{f.__name__}" for f in funcs]
     if len(set(var_names)) < len(var_names):
-        var_names = [f"result_{i}_{ f.__name__}" for i, f in enumerate(funcs)]
+        var_names = [f"result_{f.__name__}_{_get_module(f)}" for f in funcs]
+    if len(set(var_names)) < len(var_names):
+        var_names = [f"result_{i}_{f.__name__}" for i, f in enumerate(funcs)]
+    return var_names
+
+
+def _make_equiv_body(funcs, except_, style, annotate):
+    var_names = _get_varnames(funcs)
     test_lines = [
         _write_call(f, assign=vname, except_=except_)
         for vname, f in zip(var_names, funcs)
@@ -1512,10 +1546,7 @@ def _make_equiv_body(funcs, except_, style, annotate):
 
 
 def _make_equiv_errors_body(funcs, except_, style, annotate):
-    var_names = [f"result_{f.__name__}" for f in funcs]
-    if len(set(var_names)) < len(var_names):
-        var_names = [f"result_{i}_{ f.__name__}" for i, f in enumerate(funcs)]
-
+    var_names = _get_varnames(funcs)
     first, *rest = funcs
     first_call = _write_call(first, assign=var_names[0], except_=except_)
     extra_imports, suppress = _exception_string(except_)
@@ -1715,18 +1746,11 @@ def maker(
         maker(
             "associative",
             "abc",
+            _write_call(func, "a", _write_call(func, "b", "c"), assign="left"),
             _write_call(
                 func,
-                "a",
-                _write_call(func, "b", "c", except_=Exception),
-                except_=Exception,
-                assign="left",
-            ),
-            _write_call(
-                func,
-                _write_call(func, "a", "b", except_=Exception),
+                _write_call(func, "a", "b"),
                 "c",
-                except_=Exception,
                 assign="right",
             ),
         )
@@ -1734,8 +1758,8 @@ def maker(
         maker(
             "commutative",
             "ab",
-            _write_call(func, "a", "b", except_=Exception, assign="left"),
-            _write_call(func, "b", "a", except_=Exception, assign="right"),
+            _write_call(func, "a", "b", assign="left"),
+            _write_call(func, "b", "a", assign="right"),
         )
     if identity is not None:
         # Guess that the identity element is the minimal example from our operands
@@ -1757,34 +1781,42 @@ def maker(
             compile(repr(identity), "<string>", "exec")
         except SyntaxError:
             identity = repr(identity)  # type: ignore
-        maker(
-            "identity",
-            "a",
+        identity_parts = [
+            f"{identity = }",
             _assert_eq(
                 style,
                 "a",
-                _write_call(func, "a", repr(identity), except_=Exception),
+                _write_call(func, "a", "identity"),
             ),
-        )
+            _assert_eq(
+                style,
+                "a",
+                _write_call(func, "identity", "a"),
+            ),
+        ]
+        maker("identity", "a", "\n".join(identity_parts))
     if distributes_over:
-        maker(
-            distributes_over.__name__ + "_distributes_over",
-            "abc",
+        do = distributes_over
+        dist_parts = [
+            _write_call(func, "a", _write_call(do, "b", "c"), assign="left"),
             _write_call(
-                distributes_over,
-                _write_call(func, "a", "b", except_=Exception),
-                _write_call(func, "a", "c", except_=Exception),
-                except_=Exception,
-                assign="left",
+                do,
+                _write_call(func, "a", "b"),
+                _write_call(func, "a", "c"),
+                assign="ldist",
             ),
+            _assert_eq(style, "ldist", "left"),
+            "\n",
+            _write_call(func, _write_call(do, "a", "b"), "c", assign="right"),
             _write_call(
-                func,
-                "a",
-                _write_call(distributes_over, "b", "c", except_=Exception),
-                except_=Exception,
-                assign="right",
+                do,
+                _write_call(func, "a", "c"),
+                _write_call(func, "b", "c"),
+                assign="rdist",
             ),
-        )
+            _assert_eq(style, "rdist", "right"),
+        ]
+        maker(do.__name__ + "_distributes_over", "abc", "\n".join(dist_parts))
 
     _, operands_repr = _valid_syntax_repr(operands)
     operands_repr = _st_strategy_names(operands_repr)
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index bbfb864380..bef8b04213 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -18,6 +18,7 @@
     TYPE_CHECKING,
     Any,
     Callable,
+    DefaultDict,
     Dict,
     FrozenSet,
     Iterable,
@@ -641,7 +642,9 @@ def length(self) -> int:
 
     @property
     def trivial(self) -> bool:
-        return self.forced or self.all_zero
+        # return self.forced or self.all_zero
+        # TODO need a fake_forced for shrinking.
+        return self.all_zero
 
 
 class Blocks:
@@ -836,34 +839,6 @@ def as_result(self) -> "_Overrun":
 MAX_DEPTH = 100
 
 
-class IntegerKWargs(TypedDict):
-    min_value: Optional[int]
-    max_value: Optional[int]
-    weights: Optional[Sequence[float]]
-    shrink_towards: int
-
-
-class FloatKWargs(TypedDict):
-    min_value: float
-    max_value: float
-    allow_nan: bool
-    smallest_nonzero_magnitude: float
-
-
-class StringKWargs(TypedDict):
-    intervals: IntervalSet
-    min_size: int
-    max_size: Optional[int]
-
-
-class BytesKWargs(TypedDict):
-    size: int
-
-
-class BooleanKWargs(TypedDict):
-    p: float
-
-
 class DataObserver:
     """Observer class for recording the behaviour of a
     ConjectureData object, primarily used for tracking
@@ -909,6 +884,104 @@ def draw_boolean(
         pass
 
 
+class IRTreeLeaf:
+    def __init__(self, *, ir_type: IRLiteralType, value: IRType, kwargs: IRKWargsType):
+        self.ir_type = ir_type
+        self.value = value
+        self.kwargs = kwargs
+
+    def _repr_pretty_(self, p, cycle):
+        assert cycle is False
+        p.text(f"{self.ir_type} {self.value} {self.kwargs}")
+
+
+class IRTreeNode:
+    def __init__(
+        self,
+        *,
+        label: int,
+        parent: Optional["IRTreeNode"],
+        index_in_parent: Optional[int] = None,
+    ):
+        self.parent = parent
+        self.index_in_parent = index_in_parent
+        self.label = label
+        self.children: List[Union[IRTreeNode, IRTreeLeaf]] = []
+
+    def _repr_pretty_(self, p, cycle):
+        assert cycle is False
+        p.text(str(self.label))
+        with p.indent(1):
+            for child in self.children:
+                p.break_()
+                p.pretty(child)
+
+
+class IRTree:
+    def __init__(self):
+        # only None when the top example hasn't been started yet
+        self.root: Optional[IRTreeNode] = None
+        self.current_node: Optional[IRTreeNode] = None
+
+    def _repr_pretty_(self, p, cycle):
+        assert cycle is False
+        p.pretty(self.root)
+
+    def leaves(self) -> List[IRTreeLeaf]:
+        def _leaves(node):
+            leaves = []
+            for child in node.children:
+                if isinstance(child, IRTreeNode):
+                    leaves += _leaves(child)
+                else:
+                    assert isinstance(child, IRTreeLeaf)
+                    leaves.append(child)
+            return leaves
+
+        return _leaves(self.root)
+
+    def start_example(self, label):
+        if self.root is None:
+            assert label == TOP_LABEL
+            self.root = IRTreeNode(label=TOP_LABEL, parent=None, index_in_parent=None)
+            self.current_node = self.root
+            return
+
+        node = IRTreeNode(
+            label=label,
+            parent=self.current_node,
+            index_in_parent=len(self.current_node.children) + 1,
+        )
+        self.current_node.children.append(node)
+        self.current_node = node
+
+    def stop_example(self):
+        if self.current_node.parent is None:
+            assert self.current_node.label == TOP_LABEL
+
+        self.current_node = self.current_node.parent
+
+    def draw_value(self, ir_type: IRLiteralType, value: IRType, kwargs: IRKWargsType):
+        assert self.current_node is not None
+        leaf = IRTreeLeaf(ir_type=ir_type, value=value, kwargs=kwargs)
+        self.current_node.children.append(leaf)
+
+    def draw_integer(self, value: int, kwargs: IntegerKWargs):
+        self.draw_value("integer", value, kwargs)
+
+    def draw_float(self, value: float, kwargs: FloatKWargs):
+        self.draw_value("float", value, kwargs)
+
+    def draw_string(self, value: str, kwargs: StringKWargs):
+        self.draw_value("string", value, kwargs)
+
+    def draw_bytes(self, value: bytes, kwargs: BytesKWargs):
+        self.draw_value("bytes", value, kwargs)
+
+    def draw_boolean(self, value: bool, kwargs: BooleanKWargs):
+        self.draw_value("boolean", value, kwargs)
+
+
 @dataclass_transform()
 @attr.s(slots=True)
 class ConjectureResult:
@@ -919,6 +992,7 @@ class ConjectureResult:
     status: Status = attr.ib()
     interesting_origin: Optional[InterestingOrigin] = attr.ib()
     buffer: bytes = attr.ib()
+    ir_tree: IRTree = attr.ib()
     blocks: Blocks = attr.ib()
     output: str = attr.ib()
     extra_information: Optional[ExtraInformation] = attr.ib()
@@ -1489,6 +1563,16 @@ def for_buffer(
             len(buffer), buffer, random=None, observer=observer, provider=provider
         )
 
+    @classmethod
+    def for_ir_tree(
+        cls,
+        ir_tree_prefix: IRTree,
+        *,
+        observer: Optional[DataObserver] = None,
+        provider: type = PrimitiveProvider,
+    ) -> "ConjectureData":
+        return cls(8 * 1024, b"", random=None, ir_tree_prefix=ir_tree_prefix, observer=observer, provider=provider)
+
     def __init__(
         self,
         max_length: int,
@@ -1497,6 +1581,7 @@ def __init__(
         random: Optional[Random],
         observer: Optional[DataObserver] = None,
         provider: type = PrimitiveProvider,
+        ir_tree_prefix: Optional[IRTree] = None,
     ) -> None:
         if observer is None:
             observer = DataObserver()
@@ -1509,7 +1594,8 @@ def __init__(
         self.__prefix = bytes(prefix)
         self.__random = random
 
-        assert random is not None or max_length <= len(prefix)
+        if ir_tree_prefix is None:
+            assert random is not None or max_length <= len(prefix)
 
         self.blocks = Blocks(self)
         self.buffer: "Union[bytes, bytearray]" = bytearray()
@@ -1525,6 +1611,7 @@ def __init__(
         self.forced_indices: "Set[int]" = set()
         self.interesting_origin: Optional[InterestingOrigin] = None
         self.draw_times: "Dict[str, float]" = {}
+        self._stateful_run_times: "DefaultDict[str, float]" = defaultdict(float)
         self.max_depth = 0
         self.has_discards = False
         self.provider = provider(self)
@@ -1561,6 +1648,11 @@ def __init__(
 
         self.extra_information = ExtraInformation()
 
+        self.ir_tree = IRTree()
+        self.ir_tree_leaves = None if ir_tree_prefix is None else ir_tree_prefix.leaves()
+        self.ir_tree_leaves_index = 0
+        self.start_example(TOP_LABEL)
+
 
     def __repr__(self):
         return "ConjectureData(%s, %d bytes%s)" % (
@@ -1618,12 +1710,22 @@ def draw_integer(
             "weights": weights,
             "shrink_towards": shrink_towards,
         }
+        if self.ir_tree_leaves is not None:
+            forced = self.ir_tree_leaves[self.ir_tree_leaves_index].value
+            self.ir_tree_leaves_index += 1
+            # assert isinstance(self.ir_current_node, IRTreeNode)
+            # leaf = self.ir_current_node.children[self.ir_index_in_current_node]
+            # forced = leaf.value
+            # # no flakiness
+            # assert leaf.kwargs == kwargs
+            # self.ir_index_in_current_node += 1
+
         value = self.provider.draw_integer(**kwargs, forced=forced)
         if observe:
             self.observer.draw_integer(
                 value, was_forced=forced is not None, kwargs=kwargs
             )
-            self.record_ir_value("integer", value, kwargs)
+            self.ir_tree.draw_integer(value, kwargs)
         return value
 
     def draw_float(
@@ -1661,7 +1763,7 @@ def draw_float(
             self.observer.draw_float(
                 value, kwargs=kwargs, was_forced=forced is not None
             )
-            self.record_ir_value("float", value, kwargs)
+            self.ir_tree.draw_float(value, kwargs)
         return value
 
     def draw_string(
@@ -1685,7 +1787,7 @@ def draw_string(
             self.observer.draw_string(
                 value, kwargs=kwargs, was_forced=forced is not None
             )
-            self.record_ir_value("string", value, kwargs)
+            self.ir_tree.draw_string(value, kwargs)
         return value
 
     def draw_bytes(
@@ -1705,7 +1807,7 @@ def draw_bytes(
             self.observer.draw_bytes(
                 value, kwargs=kwargs, was_forced=forced is not None
             )
-            self.record_ir_value("bytes", value, kwargs)
+            self.ir_tree.draw_bytes(value, kwargs)
         return value
 
     def draw_boolean(
@@ -1727,7 +1829,7 @@ def draw_boolean(
             self.observer.draw_boolean(
                 value, kwargs=kwargs, was_forced=forced is not None
             )
-            self.record_ir_value("boolean", value, kwargs)
+            self.ir_tree.draw_boolean(value, kwargs)
         return value
 
     def as_result(self) -> Union[ConjectureResult, _Overrun]:
@@ -1742,6 +1844,7 @@ def as_result(self) -> Union[ConjectureResult, _Overrun]:
                 status=self.status,
                 interesting_origin=self.interesting_origin,
                 buffer=self.buffer,
+                ir_tree=self.ir_tree,
                 examples=self.examples,
                 blocks=self.blocks,
                 output=self.output,
@@ -1771,26 +1874,6 @@ def note(self, value: Any) -> None:
             value = repr(value)
         self.output += value
 
-    def record_ir_value(
-        self, ir_type: IRLiteralType, value: IRType, kwargs: IRKWargsType
-    ):
-        if isinstance(self.provider, PrimitiveProvider):
-            return
-        from hypothesis.internal.conjecture.engine import BUFFER_SIZE
-
-        cd = ConjectureData(
-            max_length=BUFFER_SIZE, prefix=b"", random=self.__random
-        )
-        # calling this will write to cd.buffer.
-        draw_func = getattr(cd, f"draw_{ir_type}")
-        draw_func(**kwargs, forced=value, observe=False)
-        forced_i = int_from_bytes(cd.buffer)
-        size = len(cd.buffer)
-        # drive this through draw_bits so the appropriate examples and blocks
-        # are created.
-        # fake_forced is so we force this value to be drawn, but still create
-        # examples and blocks as if it wasn't forced.
-        self.draw_bits(8 * size, forced=forced_i, fake_forced=True)
 
     def draw(
         self,
@@ -1834,7 +1917,7 @@ def draw(
                 try:
                     return strategy.do_draw(self)
                 finally:
-                    key = observe_as or f"unlabeled_{len(self.draw_times)}"
+                    key = observe_as or f"generate:unlabeled_{len(self.draw_times)}"
                     self.draw_times[key] = time.perf_counter() - start_time
         finally:
             self.stop_example()
@@ -1852,6 +1935,7 @@ def start_example(self, label: int) -> None:
             self.max_depth = self.depth
         self.__example_record.start_example(label)
         self.labels_for_structure_stack.append({label})
+        self.ir_tree.start_example(label)
 
     def stop_example(self, *, discard: bool = False) -> None:
         if self.frozen:
@@ -1897,6 +1981,8 @@ def stop_example(self, *, discard: bool = False) -> None:
 
             self.observer.kill_branch()
 
+        self.ir_tree.stop_example()
+
     @property
     def examples(self) -> Examples:
         assert self.frozen
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index c463da94fa..6c43871c61 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -50,6 +50,13 @@
 MIN_TEST_CALLS = 10
 BUFFER_SIZE = 8 * 1024
 
+# If the shrinking phase takes more than five minutes, abort it early and print
+# a warning.   Many CI systems will kill a build after around ten minutes with
+# no output, and appearing to hang isn't great for interactive use either -
+# showing partially-shrunk examples is better than quitting with no examples!
+# (but make it monkeypatchable, for the rare users who need to keep on shrinking)
+MAX_SHRINKING_SECONDS = 300
+
 
 @attr.s
 class HealthCheckState:
@@ -223,7 +230,7 @@ def test_function(self, data):
             self.debug(self.__pending_call_explanation)
             self.__pending_call_explanation = None
 
-        assert isinstance(data.observer, TreeRecordingObserver)
+        # assert isinstance(data.observer, TreeRecordingObserver)
         self.call_count += 1
 
         interrupted = False
@@ -298,6 +305,13 @@ def test_function(self, data):
                     changed = True
 
             if changed:
+                if self.settings.backend != "hypothesis":
+                    existing_ir_tree = data.ir_tree
+                    data = ConjectureData.for_ir_tree(existing_ir_tree)
+                    self.__stoppable_test_function(data)
+                    self.__data_cache[data.buffer] = data.as_result()
+                    print(f"ran to buffer {data.buffer}")
+                    print(f"from ir tree {[l.value for l in existing_ir_tree.leaves()]}")
                 self.save_buffer(data.buffer)
                 self.interesting_examples[key] = data.as_result()
                 self.__data_cache.pin(data.buffer)
@@ -822,8 +836,8 @@ def generate_mutations_from(self, data):
                 )
                 assert ex1.end <= ex2.start
 
-                replacements = [data.buffer[e.start : e.end] for e in [ex1, ex2]]
-                replacement = self.random.choice(replacements)
+                e = self.random.choice([ex1, ex2])
+                replacement = data.buffer[e.start : e.end]
 
                 try:
                     # We attempt to replace both the the examples with
@@ -936,11 +950,15 @@ def new_conjecture_data(self, prefix, max_length=BUFFER_SIZE, observer=None):
             mname, cname = AVAILABLE_PROVIDERS[self.settings.backend].rsplit(".", 1)
             provider_cls = getattr(importlib.import_module(mname), cname)
 
+        observer = observer or self.tree.new_observer()
+        if self.settings.backend != "hypothesis": # replace with wants_datatree
+            observer = DataObserver()
+
         return ConjectureData(
             prefix=prefix,
             max_length=max_length,
             random=self.random,
-            observer=observer or self.tree.new_observer(),
+            observer=observer,
             provider=provider_cls,
         )
 
@@ -958,12 +976,7 @@ def shrink_interesting_examples(self):
             return
 
         self.debug("Shrinking interesting examples")
-
-        # If the shrinking phase takes more than five minutes, abort it early and print
-        # a warning.   Many CI systems will kill a build after around ten minutes with
-        # no output, and appearing to hang isn't great for interactive use either -
-        # showing partially-shrunk examples is better than quitting with no examples!
-        self.finish_shrinking_deadline = time.perf_counter() + 300
+        self.finish_shrinking_deadline = time.perf_counter() + MAX_SHRINKING_SECONDS
 
         for prev_data in sorted(
             self.interesting_examples.values(), key=lambda d: sort_key(d.buffer)
@@ -1088,20 +1101,21 @@ def kill_branch(self):
             prefix=buffer, max_length=max_length, observer=observer
         )
 
-        try:
-            self.tree.simulate_test_function(dummy_data)
-        except PreviouslyUnseenBehaviour:
-            pass
-        else:
-            if dummy_data.status > Status.OVERRUN:
-                dummy_data.freeze()
-                try:
-                    return self.__data_cache[dummy_data.buffer]
-                except KeyError:
-                    pass
+        if self.settings.backend == "hypothesis": # replace with wants_datatree
+            try:
+                self.tree.simulate_test_function(dummy_data)
+            except PreviouslyUnseenBehaviour:
+                pass
             else:
-                self.__data_cache[buffer] = Overrun
-                return Overrun
+                if dummy_data.status > Status.OVERRUN:
+                    dummy_data.freeze()
+                    try:
+                        return self.__data_cache[dummy_data.buffer]
+                    except KeyError:
+                        pass
+                else:
+                    self.__data_cache[buffer] = Overrun
+                    return Overrun
 
         # We didn't find a match in the tree, so we need to run the test
         # function normally. Note that test_function will automatically
diff --git a/hypothesis-python/src/hypothesis/internal/escalation.py b/hypothesis-python/src/hypothesis/internal/escalation.py
index c3c678d239..e284907e38 100644
--- a/hypothesis-python/src/hypothesis/internal/escalation.py
+++ b/hypothesis-python/src/hypothesis/internal/escalation.py
@@ -93,6 +93,7 @@ def get_trimmed_traceback(exception=None):
         or hypothesis.settings.default.verbosity >= hypothesis.Verbosity.debug
         or is_hypothesis_file(traceback.extract_tb(tb)[-1][0])
         and not isinstance(exception, _Trimmable)
+        or True
     ):
         return tb
     while tb.tb_next is not None and (
diff --git a/hypothesis-python/src/hypothesis/internal/observability.py b/hypothesis-python/src/hypothesis/internal/observability.py
index 98753985f1..aff19d805c 100644
--- a/hypothesis-python/src/hypothesis/internal/observability.py
+++ b/hypothesis-python/src/hypothesis/internal/observability.py
@@ -41,9 +41,12 @@ def make_testcase(
     arguments: Optional[dict] = None,
     timing: Dict[str, float],
     coverage: Optional[Dict[str, List[int]]] = None,
+    phase: Optional[str] = None,
 ) -> dict:
     if data.interesting_origin:
         status_reason = str(data.interesting_origin)
+    elif phase == "shrink" and data.status == Status.OVERRUN:
+        status_reason = "exceeded size of current best example"
     else:
         status_reason = str(data.events.pop("invalid because", ""))
 
diff --git a/hypothesis-python/src/hypothesis/stateful.py b/hypothesis-python/src/hypothesis/stateful.py
index 2ae5815161..bf7271fc4c 100644
--- a/hypothesis-python/src/hypothesis/stateful.py
+++ b/hypothesis-python/src/hypothesis/stateful.py
@@ -20,6 +20,7 @@
 from copy import copy
 from functools import lru_cache
 from io import StringIO
+from time import perf_counter
 from typing import (
     Any,
     Callable,
@@ -48,6 +49,7 @@
 from hypothesis.errors import InvalidArgument, InvalidDefinition
 from hypothesis.internal.conjecture import utils as cu
 from hypothesis.internal.healthcheck import fail_health_check
+from hypothesis.internal.observability import TESTCASE_CALLBACKS
 from hypothesis.internal.reflection import (
     function_digest,
     get_pretty_function_description,
@@ -121,10 +123,17 @@ def run_state_machine(factory, data):
         print_steps = (
             current_build_context().is_final or current_verbosity() >= Verbosity.debug
         )
-        try:
+        cd._stateful_repr_parts = []
+
+        def output(s):
             if print_steps:
-                report(f"state = {machine.__class__.__name__}()")
-            machine.check_invariants(settings)
+                report(s)
+            if TESTCASE_CALLBACKS:
+                cd._stateful_repr_parts.append(s)
+
+        try:
+            output(f"state = {machine.__class__.__name__}()")
+            machine.check_invariants(settings, output, cd._stateful_run_times)
             max_steps = settings.stateful_step_count
             steps_run = 0
 
@@ -141,6 +150,8 @@ def run_state_machine(factory, data):
                     must_stop = True
                 elif steps_run <= _min_steps:
                     must_stop = False
+
+                start_draw = perf_counter()
                 if cd.draw_boolean(p=2**-16, forced=must_stop):
                     break
                 steps_run += 1
@@ -156,12 +167,15 @@ def run_state_machine(factory, data):
                     machine._initialize_rules_to_run.remove(rule)
                 else:
                     rule, data = cd.draw(machine._rules_strategy)
+                draw_label = f"generate:rule:{rule.function.__name__}"
+                cd.draw_times.setdefault(draw_label, 0.0)
+                cd.draw_times[draw_label] += perf_counter() - start_draw
 
                 # Pretty-print the values this rule was called with *before* calling
                 # _add_result_to_targets, to avoid printing arguments which are also
                 # a return value using the variable name they are assigned to.
                 # See https://github.com/HypothesisWorks/hypothesis/issues/2341
-                if print_steps:
+                if print_steps or TESTCASE_CALLBACKS:
                     data_to_print = {
                         k: machine._pretty_print(v) for k, v in data.items()
                     }
@@ -173,7 +187,12 @@ def run_state_machine(factory, data):
                     for k, v in list(data.items()):
                         if isinstance(v, VarReference):
                             data[k] = machine.names_to_values[v.name]
+
+                    label = f"execute:rule:{rule.function.__name__}"
+                    start = perf_counter()
                     result = rule.function(machine, **data)
+                    cd._stateful_run_times[label] += perf_counter() - start
+
                     if rule.targets:
                         if isinstance(result, MultipleResults):
                             for single_result in result.values:
@@ -190,16 +209,15 @@ def run_state_machine(factory, data):
                             HealthCheck.return_value,
                         )
                 finally:
-                    if print_steps:
+                    if print_steps or TESTCASE_CALLBACKS:
                         # 'result' is only used if the step has target bundles.
                         # If it does, and the result is a 'MultipleResult',
                         # then 'print_step' prints a multi-variable assignment.
-                        machine._print_step(rule, data_to_print, result)
-                machine.check_invariants(settings)
+                        output(machine._repr_step(rule, data_to_print, result))
+                machine.check_invariants(settings, output, cd._stateful_run_times)
                 cd.stop_example()
         finally:
-            if print_steps:
-                report("state.teardown()")
+            output("state.teardown()")
             machine.teardown()
 
     # Use a machine digest to identify stateful tests in the example database
@@ -338,7 +356,7 @@ def invariants(cls):
         cls._invariants_per_class[cls] = target
         return cls._invariants_per_class[cls]
 
-    def _print_step(self, rule, data, result):
+    def _repr_step(self, rule, data, result):
         self.step_count = getattr(self, "step_count", 0) + 1
         output_assignment = ""
         if rule.targets:
@@ -350,13 +368,8 @@ def _print_step(self, rule, data, result):
                     output_assignment = ", ".join(output_names) + " = "
             else:
                 output_assignment = self._last_names(1)[0] + " = "
-        report(
-            "{}state.{}({})".format(
-                output_assignment,
-                rule.function.__name__,
-                ", ".join("%s=%s" % kv for kv in data.items()),
-            )
-        )
+        args = ", ".join("%s=%s" % kv for kv in data.items())
+        return f"{output_assignment}state.{rule.function.__name__}({args})"
 
     def _add_result_to_targets(self, targets, result):
         name = self._new_name()
@@ -367,18 +380,22 @@ def _add_result_to_targets(self, targets, result):
         for target in targets:
             self.bundles.setdefault(target, []).append(VarReference(name))
 
-    def check_invariants(self, settings):
+    def check_invariants(self, settings, output, runtimes):
         for invar in self.invariants():
             if self._initialize_rules_to_run and not invar.check_during_init:
                 continue
             if not all(precond(self) for precond in invar.preconditions):
                 continue
+            name = invar.function.__name__
             if (
                 current_build_context().is_final
                 or settings.verbosity >= Verbosity.debug
+                or TESTCASE_CALLBACKS
             ):
-                report(f"state.{invar.function.__name__}()")
+                output(f"state.{name}()")
+            start = perf_counter()
             result = invar.function(self)
+            runtimes[f"execute:invariant:{name}"] += perf_counter() - start
             if result is not None:
                 fail_health_check(
                     settings,
diff --git a/hypothesis-python/src/hypothesis/version.py b/hypothesis-python/src/hypothesis/version.py
index 43ec273cc1..f54f8dc42f 100644
--- a/hypothesis-python/src/hypothesis/version.py
+++ b/hypothesis-python/src/hypothesis/version.py
@@ -8,5 +8,5 @@
 # v. 2.0. If a copy of the MPL was not distributed with this file, You can
 # obtain one at https://mozilla.org/MPL/2.0/.
 
-__version_info__ = (6, 98, 6)
+__version_info__ = (6, 98, 9)
 __version__ = ".".join(map(str, __version_info__))
diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py
index 16987ccba7..8bae371725 100644
--- a/hypothesis-python/tests/conjecture/test_alt_backend.py
+++ b/hypothesis-python/tests/conjecture/test_alt_backend.py
@@ -9,10 +9,10 @@
 # obtain one at https://mozilla.org/MPL/2.0/.
 
 import math
+import sys
 from contextlib import contextmanager
 from random import Random
 from typing import Optional, Sequence
-import sys
 
 import pytest
 
@@ -78,8 +78,7 @@ def draw_float(
 
         if allow_nan and self.prng.random() < 1 / 32:
             nans = [math.nan, -math.nan, SIGNALING_NAN, -SIGNALING_NAN]
-            value = self.prng.choice(nans)
-            return value
+            return self.prng.choice(nans)
 
         # small chance of inf values, if they are in bounds
         if min_value <= math.inf <= max_value and self.prng.random() < 1 / 32:
@@ -139,7 +138,7 @@ def temp_register_backend():
         st.integers(0, 10),
         st.floats(allow_nan=False),
         st.text(max_size=5),
-        st.binary(),
+        st.binary(max_size=5),
     ],
     ids=repr,
 )
diff --git a/hypothesis-python/tests/cover/test_observability.py b/hypothesis-python/tests/cover/test_observability.py
index ca082d02c7..d63ccfc9ba 100644
--- a/hypothesis-python/tests/cover/test_observability.py
+++ b/hypothesis-python/tests/cover/test_observability.py
@@ -24,6 +24,12 @@
 )
 from hypothesis.database import InMemoryExampleDatabase
 from hypothesis.internal.observability import TESTCASE_CALLBACKS
+from hypothesis.stateful import (
+    RuleBasedStateMachine,
+    invariant,
+    rule,
+    run_state_machine_as_test,
+)
 
 
 @contextlib.contextmanager
@@ -84,3 +90,38 @@ def f(b):
     gave_ups = [t for t in ls if t["type"] == "test_case" and t["status"] == "gave_up"]
     for gave_up in gave_ups:
         assert gave_up["status_reason"].startswith("failed to satisfy assume() in f")
+
+
+@settings(max_examples=20, stateful_step_count=5)
+class UltraSimpleMachine(RuleBasedStateMachine):
+    value = 0
+
+    @rule()
+    def inc(self):
+        self.value += 1
+
+    @rule()
+    def dec(self):
+        self.value -= 1
+
+    @invariant()
+    def limits(self):
+        assert abs(self.value) <= 100
+
+
+def test_observability_captures_stateful_reprs():
+    with capture_observations() as ls:
+        run_state_machine_as_test(UltraSimpleMachine)
+
+    for x in ls:
+        if x["type"] != "test_case" or x["status"] == "gave_up":
+            continue
+        r = x["representation"]
+        assert "state.limits()" in r
+        assert "state.inc()" in r or "state.dec()" in r  # or both
+
+        t = x["timing"]
+        assert "execute:invariant:limits" in t
+        has_inc = "generate:rule:inc" in t and "execute:rule:inc" in t
+        has_dec = "generate:rule:dec" in t and "execute:rule:dec" in t
+        assert has_inc or has_dec
diff --git a/hypothesis-python/tests/cover/test_stateful.py b/hypothesis-python/tests/cover/test_stateful.py
index 0917c157f1..84b9edcc59 100644
--- a/hypothesis-python/tests/cover/test_stateful.py
+++ b/hypothesis-python/tests/cover/test_stateful.py
@@ -16,7 +16,14 @@
 from _pytest.outcomes import Failed, Skipped
 from pytest import raises
 
-from hypothesis import Phase, __version__, reproduce_failure, seed, settings as Settings
+from hypothesis import (
+    HealthCheck,
+    Phase,
+    __version__,
+    reproduce_failure,
+    seed,
+    settings as Settings,
+)
 from hypothesis.control import current_build_context
 from hypothesis.database import ExampleDatabase
 from hypothesis.errors import DidNotReproduce, Flaky, InvalidArgument, InvalidDefinition
@@ -395,10 +402,11 @@ def test_settings_attribute_is_validated():
 
 def test_saves_failing_example_in_database():
     db = ExampleDatabase(":memory:")
+    ss = Settings(
+        database=db, max_examples=1000, suppress_health_check=list(HealthCheck)
+    )
     with raises(AssertionError):
-        run_state_machine_as_test(
-            DepthMachine, settings=Settings(database=db, max_examples=100)
-        )
+        run_state_machine_as_test(DepthMachine, settings=ss)
     assert any(list(db.data.values()))
 
 
@@ -766,7 +774,7 @@ def values_agree(self, k):
 
 
 def test_prints_equal_values_with_correct_variable_name():
-    @Settings(max_examples=100)
+    @Settings(max_examples=100, suppress_health_check=list(HealthCheck))
     class MovesBetweenBundles(RuleBasedStateMachine):
         b1 = Bundle("b1")
         b2 = Bundle("b2")
diff --git a/hypothesis-python/tests/ghostwriter/recorded/addition_op_magic.txt b/hypothesis-python/tests/ghostwriter/recorded/addition_op_magic.txt
index 23827cb909..0f164bc2f3 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/addition_op_magic.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/addition_op_magic.txt
@@ -23,4 +23,6 @@ def test_commutative_binary_operation_add(a: float, b: float) -> None:
 
 @given(a=add_operands)
 def test_identity_binary_operation_add(a: float) -> None:
-    assert a == test_expected_output.add(a=a, b=0.0)
+    identity = 0.0
+    assert a == test_expected_output.add(a=a, b=identity)
+    assert a == test_expected_output.add(a=identity, b=a)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/addition_op_multimagic.txt b/hypothesis-python/tests/ghostwriter/recorded/addition_op_multimagic.txt
index 194ff573e0..fc71ea7a34 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/addition_op_multimagic.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/addition_op_multimagic.txt
@@ -1,16 +1,22 @@
 # This test code was written by the `hypothesis.extra.ghostwriter` module
 # and is provided under the Creative Commons Zero public domain dedication.
 
-import _operator
 import numpy
+import operator
 import test_expected_output
 from hypothesis import given, strategies as st
 
 
 @given(a=st.floats(), b=st.floats())
 def test_equivalent_add_add_add(a: float, b: float) -> None:
-    result_0_add = _operator.add(a, b)
-    result_1_add = numpy.add(a, b)
-    result_2_add = test_expected_output.add(a=a, b=b)
-    assert result_0_add == result_1_add, (result_0_add, result_1_add)
-    assert result_0_add == result_2_add, (result_0_add, result_2_add)
+    result_add_numpy = numpy.add(a, b)
+    result_add_operator = operator.add(a, b)
+    result_add_test_expected_output = test_expected_output.add(a=a, b=b)
+    assert result_add_numpy == result_add_operator, (
+        result_add_numpy,
+        result_add_operator,
+    )
+    assert result_add_numpy == result_add_test_expected_output, (
+        result_add_numpy,
+        result_add_test_expected_output,
+    )
diff --git a/hypothesis-python/tests/ghostwriter/recorded/division_binop_error_handler.txt b/hypothesis-python/tests/ghostwriter/recorded/division_binop_error_handler.txt
index 4248fe5754..ff209f01e1 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/division_binop_error_handler.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/division_binop_error_handler.txt
@@ -23,4 +23,6 @@ def test_commutative_binary_operation_divide(a: int, b: int) -> None:
 
 @given(a=divide_operands)
 def test_identity_binary_operation_divide(a: int) -> None:
-    assert a == test_expected_output.divide(a=a, b=1)
+    identity = 1
+    assert a == test_expected_output.divide(a=a, b=identity)
+    assert a == test_expected_output.divide(a=identity, b=a)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/division_operator.txt b/hypothesis-python/tests/ghostwriter/recorded/division_operator.txt
index 71bbf7a040..8daeac4f88 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/division_operator.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/division_operator.txt
@@ -1,7 +1,7 @@
 # This test code was written by the `hypothesis.extra.ghostwriter` module
 # and is provided under the Creative Commons Zero public domain dedication.
 
-import _operator
+import operator
 from hypothesis import given, strategies as st
 
 # TODO: replace st.nothing() with an appropriate strategy
@@ -11,4 +11,6 @@ truediv_operands = st.nothing()
 
 @given(a=truediv_operands)
 def test_identity_binary_operation_truediv(a):
-    assert a == _operator.truediv(a, "identity element here")
+    identity = "identity element here"
+    assert a == operator.truediv(a, identity)
+    assert a == operator.truediv(identity, a)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/division_operator_with_annotations.txt b/hypothesis-python/tests/ghostwriter/recorded/division_operator_with_annotations.txt
index 21eb93d172..bd26cbffde 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/division_operator_with_annotations.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/division_operator_with_annotations.txt
@@ -1,7 +1,7 @@
 # This test code was written by the `hypothesis.extra.ghostwriter` module
 # and is provided under the Creative Commons Zero public domain dedication.
 
-import _operator
+import operator
 from hypothesis import given, strategies as st
 
 # TODO: replace st.nothing() with an appropriate strategy
@@ -11,4 +11,6 @@ truediv_operands = st.nothing()
 
 @given(a=truediv_operands)
 def test_identity_binary_operation_truediv(a) -> None:
-    assert a == _operator.truediv(a, "identity element here")
+    identity = "identity element here"
+    assert a == operator.truediv(a, identity)
+    assert a == operator.truediv(identity, a)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_arithmeticerror_handler.txt b/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_arithmeticerror_handler.txt
index 31f9f7d421..8745936fa0 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_arithmeticerror_handler.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_arithmeticerror_handler.txt
@@ -1,7 +1,7 @@
 # This test code was written by the `hypothesis.extra.ghostwriter` module
 # and is provided under the Creative Commons Zero public domain dedication.
 
-import _operator
+import operator
 import test_expected_output
 from hypothesis import given, reject, strategies as st
 
@@ -10,7 +10,7 @@ from hypothesis import given, reject, strategies as st
 def test_roundtrip_divide_mul(a: int, b: int) -> None:
     try:
         value0 = test_expected_output.divide(a=a, b=b)
-        value1 = _operator.mul(value0, b)
+        value1 = operator.mul(value0, b)
     except ArithmeticError:
         reject()
     assert a == value1, (a, value1)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_error_handler.txt b/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_error_handler.txt
index 7f2b360762..4aea99d133 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_error_handler.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_error_handler.txt
@@ -1,7 +1,7 @@
 # This test code was written by the `hypothesis.extra.ghostwriter` module
 # and is provided under the Creative Commons Zero public domain dedication.
 
-import _operator
+import operator
 import test_expected_output
 from hypothesis import given, reject, strategies as st
 
@@ -12,5 +12,5 @@ def test_roundtrip_divide_mul(a: int, b: int) -> None:
         value0 = test_expected_output.divide(a=a, b=b)
     except ZeroDivisionError:
         reject()
-    value1 = _operator.mul(value0, b)
+    value1 = operator.mul(value0, b)
     assert a == value1, (a, value1)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_error_handler_without_annotations.txt b/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_error_handler_without_annotations.txt
index 719d9067aa..cb0d5bc614 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_error_handler_without_annotations.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_error_handler_without_annotations.txt
@@ -1,7 +1,7 @@
 # This test code was written by the `hypothesis.extra.ghostwriter` module
 # and is provided under the Creative Commons Zero public domain dedication.
 
-import _operator
+import operator
 import test_expected_output
 from hypothesis import given, reject, strategies as st
 
@@ -12,5 +12,5 @@ def test_roundtrip_divide_mul(a, b):
         value0 = test_expected_output.divide(a=a, b=b)
     except ZeroDivisionError:
         reject()
-    value1 = _operator.mul(value0, b)
+    value1 = operator.mul(value0, b)
     assert a == value1, (a, value1)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_typeerror_handler.txt b/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_typeerror_handler.txt
index 4fc16c7964..52e923d66c 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_typeerror_handler.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/division_roundtrip_typeerror_handler.txt
@@ -1,7 +1,7 @@
 # This test code was written by the `hypothesis.extra.ghostwriter` module
 # and is provided under the Creative Commons Zero public domain dedication.
 
-import _operator
+import operator
 import test_expected_output
 from hypothesis import given, reject, strategies as st
 
@@ -13,7 +13,7 @@ def test_roundtrip_divide_mul(a: int, b: int) -> None:
             value0 = test_expected_output.divide(a=a, b=b)
         except ZeroDivisionError:
             reject()
-        value1 = _operator.mul(value0, b)
+        value1 = operator.mul(value0, b)
     except TypeError:
         reject()
     assert a == value1, (a, value1)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/hypothesis_module_magic.txt b/hypothesis-python/tests/ghostwriter/recorded/hypothesis_module_magic.txt
index 43bea92c84..2667fa5546 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/hypothesis_module_magic.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/hypothesis_module_magic.txt
@@ -5,8 +5,7 @@ import datetime
 import hypothesis
 import random
 import typing
-from hypothesis import given, settings, strategies as st
-from typing import Hashable
+from hypothesis import given, strategies as st
 
 
 @given(condition=st.from_type(object))
diff --git a/hypothesis-python/tests/ghostwriter/recorded/matmul_magic.txt b/hypothesis-python/tests/ghostwriter/recorded/matmul_magic.txt
new file mode 100644
index 0000000000..f73976aca5
--- /dev/null
+++ b/hypothesis-python/tests/ghostwriter/recorded/matmul_magic.txt
@@ -0,0 +1,34 @@
+# This test code was written by the `hypothesis.extra.ghostwriter` module
+# and is provided under the Creative Commons Zero public domain dedication.
+
+import operator
+from hypothesis import given, strategies as st
+
+# TODO: replace st.nothing() with an appropriate strategy
+
+matmul_operands = st.nothing()
+
+
+@given(a=matmul_operands, b=matmul_operands, c=matmul_operands)
+def test_associative_binary_operation_matmul(a, b, c):
+    left = operator.matmul(a, operator.matmul(b, c))
+    right = operator.matmul(operator.matmul(a, b), c)
+    assert left == right, (left, right)
+
+
+@given(a=matmul_operands)
+def test_identity_binary_operation_matmul(a):
+    identity = "identity element here"
+    assert a == operator.matmul(a, identity)
+    assert a == operator.matmul(identity, a)
+
+
+@given(a=matmul_operands, b=matmul_operands, c=matmul_operands)
+def test_add_distributes_over_binary_operation_matmul(a, b, c):
+    left = operator.matmul(a, operator.add(b, c))
+    ldist = operator.add(operator.matmul(a, b), operator.matmul(a, c))
+    assert ldist == left, (ldist, left)
+
+    right = operator.matmul(operator.add(a, b), c)
+    rdist = operator.add(operator.matmul(a, c), operator.matmul(b, c))
+    assert rdist == right, (rdist, right)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/multiplication_magic.txt b/hypothesis-python/tests/ghostwriter/recorded/multiplication_magic.txt
new file mode 100644
index 0000000000..674da337d6
--- /dev/null
+++ b/hypothesis-python/tests/ghostwriter/recorded/multiplication_magic.txt
@@ -0,0 +1,41 @@
+# This test code was written by the `hypothesis.extra.ghostwriter` module
+# and is provided under the Creative Commons Zero public domain dedication.
+
+import operator
+from hypothesis import given, strategies as st
+
+# TODO: replace st.nothing() with an appropriate strategy
+
+mul_operands = st.nothing()
+
+
+@given(a=mul_operands, b=mul_operands, c=mul_operands)
+def test_associative_binary_operation_mul(a, b, c):
+    left = operator.mul(a, operator.mul(b, c))
+    right = operator.mul(operator.mul(a, b), c)
+    assert left == right, (left, right)
+
+
+@given(a=mul_operands, b=mul_operands)
+def test_commutative_binary_operation_mul(a, b):
+    left = operator.mul(a, b)
+    right = operator.mul(b, a)
+    assert left == right, (left, right)
+
+
+@given(a=mul_operands)
+def test_identity_binary_operation_mul(a):
+    identity = "identity element here"
+    assert a == operator.mul(a, identity)
+    assert a == operator.mul(identity, a)
+
+
+@given(a=mul_operands, b=mul_operands, c=mul_operands)
+def test_add_distributes_over_binary_operation_mul(a, b, c):
+    left = operator.mul(a, operator.add(b, c))
+    ldist = operator.add(operator.mul(a, b), operator.mul(a, c))
+    assert ldist == left, (ldist, left)
+
+    right = operator.mul(operator.add(a, b), c)
+    rdist = operator.add(operator.mul(a, c), operator.mul(b, c))
+    assert rdist == right, (rdist, right)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/multiplication_operator.txt b/hypothesis-python/tests/ghostwriter/recorded/multiplication_operator.txt
index 4281f161e3..2ed8968b9f 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/multiplication_operator.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/multiplication_operator.txt
@@ -1,7 +1,7 @@
 # This test code was written by the `hypothesis.extra.ghostwriter` module
 # and is provided under the Creative Commons Zero public domain dedication.
 
-import _operator
+import operator
 from hypothesis import given, strategies as st
 
 # TODO: replace st.nothing() with an appropriate strategy
@@ -11,25 +11,31 @@ mul_operands = st.nothing()
 
 @given(a=mul_operands, b=mul_operands, c=mul_operands)
 def test_associative_binary_operation_mul(a, b, c):
-    left = _operator.mul(a, _operator.mul(b, c))
-    right = _operator.mul(_operator.mul(a, b), c)
+    left = operator.mul(a, operator.mul(b, c))
+    right = operator.mul(operator.mul(a, b), c)
     assert left == right, (left, right)
 
 
 @given(a=mul_operands, b=mul_operands)
 def test_commutative_binary_operation_mul(a, b):
-    left = _operator.mul(a, b)
-    right = _operator.mul(b, a)
+    left = operator.mul(a, b)
+    right = operator.mul(b, a)
     assert left == right, (left, right)
 
 
 @given(a=mul_operands)
 def test_identity_binary_operation_mul(a):
-    assert a == _operator.mul(a, 1)
+    identity = 1
+    assert a == operator.mul(a, identity)
+    assert a == operator.mul(identity, a)
 
 
 @given(a=mul_operands, b=mul_operands, c=mul_operands)
 def test_add_distributes_over_binary_operation_mul(a, b, c):
-    left = _operator.add(_operator.mul(a, b), _operator.mul(a, c))
-    right = _operator.mul(a, _operator.add(b, c))
-    assert left == right, (left, right)
+    left = operator.mul(a, operator.add(b, c))
+    ldist = operator.add(operator.mul(a, b), operator.mul(a, c))
+    assert ldist == left, (ldist, left)
+
+    right = operator.mul(operator.add(a, b), c)
+    rdist = operator.add(operator.mul(a, c), operator.mul(b, c))
+    assert rdist == right, (rdist, right)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/multiplication_operator_unittest.txt b/hypothesis-python/tests/ghostwriter/recorded/multiplication_operator_unittest.txt
index a9517c73d8..912ba51fe2 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/multiplication_operator_unittest.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/multiplication_operator_unittest.txt
@@ -1,7 +1,7 @@
 # This test code was written by the `hypothesis.extra.ghostwriter` module
 # and is provided under the Creative Commons Zero public domain dedication.
 
-import _operator
+import operator
 import unittest
 from hypothesis import given, strategies as st
 
@@ -13,22 +13,28 @@ class TestBinaryOperationmul(unittest.TestCase):
 
     @given(a=mul_operands, b=mul_operands, c=mul_operands)
     def test_associative_binary_operation_mul(self, a, b, c):
-        left = _operator.mul(a, _operator.mul(b, c))
-        right = _operator.mul(_operator.mul(a, b), c)
+        left = operator.mul(a, operator.mul(b, c))
+        right = operator.mul(operator.mul(a, b), c)
         self.assertEqual(left, right)
 
     @given(a=mul_operands, b=mul_operands)
     def test_commutative_binary_operation_mul(self, a, b):
-        left = _operator.mul(a, b)
-        right = _operator.mul(b, a)
+        left = operator.mul(a, b)
+        right = operator.mul(b, a)
         self.assertEqual(left, right)
 
     @given(a=mul_operands)
     def test_identity_binary_operation_mul(self, a):
-        self.assertEqual(a, _operator.mul(a, 1))
+        identity = 1
+        self.assertEqual(a, operator.mul(a, identity))
+        self.assertEqual(a, operator.mul(identity, a))
 
     @given(a=mul_operands, b=mul_operands, c=mul_operands)
     def test_add_distributes_over_binary_operation_mul(self, a, b, c):
-        left = _operator.add(_operator.mul(a, b), _operator.mul(a, c))
-        right = _operator.mul(a, _operator.add(b, c))
-        self.assertEqual(left, right)
+        left = operator.mul(a, operator.add(b, c))
+        ldist = operator.add(operator.mul(a, b), operator.mul(a, c))
+        self.assertEqual(ldist, left)
+
+        right = operator.mul(operator.add(a, b), c)
+        rdist = operator.add(operator.mul(a, c), operator.mul(b, c))
+        self.assertEqual(rdist, right)
diff --git a/hypothesis-python/tests/ghostwriter/test_expected_output.py b/hypothesis-python/tests/ghostwriter/test_expected_output.py
index 87d959da85..e5431f9642 100644
--- a/hypothesis-python/tests/ghostwriter/test_expected_output.py
+++ b/hypothesis-python/tests/ghostwriter/test_expected_output.py
@@ -186,6 +186,8 @@ def sequence_from_collections(items: CollectionsSequence[int]) -> int:
             ghostwriter.equivalent(sorted, sorted, sorted, annotate=True),
         ),
         ("addition_op_magic", ghostwriter.magic(add)),
+        ("multiplication_magic", ghostwriter.magic(operator.mul)),
+        ("matmul_magic", ghostwriter.magic(operator.matmul)),
         ("addition_op_multimagic", ghostwriter.magic(add, operator.add, numpy.add)),
         ("division_fuzz_error_handler", ghostwriter.fuzz(divide)),
         (
diff --git a/hypothesis-python/tests/ghostwriter/test_ghostwriter.py b/hypothesis-python/tests/ghostwriter/test_ghostwriter.py
index 5ac7533dd6..f63f881bf3 100644
--- a/hypothesis-python/tests/ghostwriter/test_ghostwriter.py
+++ b/hypothesis-python/tests/ghostwriter/test_ghostwriter.py
@@ -404,7 +404,7 @@ def test_unrepr_identity_elem():
     # we can walk the strategy and collect all the objects to import.
     [
         # Lazy from_type() is handled without being unwrapped
-        (LazyStrategy(from_type, (enum.Enum,), {}), {("enum", "Enum")}),
+        (LazyStrategy(from_type, (enum.Enum,), {}), {"enum"}),
         # Mapped, filtered, and flatmapped check both sides of the method
         (
             builds(enum.Enum).map(Decimal),
diff --git a/hypothesis-python/tests/ghostwriter/test_ghostwriter_cli.py b/hypothesis-python/tests/ghostwriter/test_ghostwriter_cli.py
index dcf541e9c1..8401c728ac 100644
--- a/hypothesis-python/tests/ghostwriter/test_ghostwriter_cli.py
+++ b/hypothesis-python/tests/ghostwriter/test_ghostwriter_cli.py
@@ -28,6 +28,7 @@
     magic,
     roundtrip,
 )
+from hypothesis.internal.reflection import get_pretty_function_description
 
 
 def run(cmd, *, cwd=None):
@@ -65,12 +66,13 @@ def run(cmd, *, cwd=None):
         ("sorted --annotate", lambda: fuzz(sorted, annotate=True)),
         ("sorted --no-annotate", lambda: fuzz(sorted, annotate=False)),
     ],
+    ids=get_pretty_function_description,
 )
 def test_cli_python_equivalence(cli, code):
     result = run("hypothesis write " + cli)
     result.check_returncode()
     cli_output = result.stdout.strip()
-    assert not result.stderr
+    assert cli == "hypothesis.strategies" or not result.stderr
     code_output = code().strip()
     assert code_output == cli_output
 
diff --git a/requirements/coverage.txt b/requirements/coverage.txt
index 0561858931..18c7c29351 100644
--- a/requirements/coverage.txt
+++ b/requirements/coverage.txt
@@ -26,7 +26,7 @@ exceptiongroup==1.2.0 ; python_version < "3.11"
     #   pytest
 execnet==2.0.2
     # via pytest-xdist
-fakeredis==2.21.0
+fakeredis==2.21.1
     # via -r requirements/coverage.in
 iniconfig==2.0.0
     # via pytest
@@ -61,7 +61,7 @@ ptyprocess==0.7.0
     # via pexpect
 pyarrow==15.0.0
     # via -r requirements/coverage.in
-pytest==8.0.0
+pytest==8.0.1
     # via
     #   -r requirements/test.in
     #   pytest-xdist
diff --git a/requirements/fuzzing.txt b/requirements/fuzzing.txt
index 082f72ec9e..466b1ae692 100644
--- a/requirements/fuzzing.txt
+++ b/requirements/fuzzing.txt
@@ -50,13 +50,13 @@ exceptiongroup==1.2.0 ; python_version < "3.11"
     #   pytest
 execnet==2.0.2
     # via pytest-xdist
-fakeredis==2.21.0
+fakeredis==2.21.1
     # via -r requirements/coverage.in
 flask==3.0.2
     # via dash
 hypofuzz==24.2.2
     # via -r requirements/fuzzing.in
-hypothesis[cli]==6.98.4
+hypothesis[cli]==6.98.6
     # via
     #   hypofuzz
     #   hypothesis
@@ -110,7 +110,7 @@ pexpect==4.9.0
     # via -r requirements/test.in
 platformdirs==4.2.0
     # via black
-plotly==5.18.0
+plotly==5.19.0
     # via dash
 pluggy==1.4.0
     # via pytest
@@ -122,7 +122,7 @@ pyarrow==15.0.0
     # via -r requirements/coverage.in
 pygments==2.17.2
     # via rich
-pytest==8.0.0
+pytest==8.0.1
     # via
     #   -r requirements/test.in
     #   hypofuzz
diff --git a/requirements/test.txt b/requirements/test.txt
index f78a472249..0a70d91df5 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -22,7 +22,7 @@ pluggy==1.4.0
     # via pytest
 ptyprocess==0.7.0
     # via pexpect
-pytest==8.0.0
+pytest==8.0.1
     # via
     #   -r requirements/test.in
     #   pytest-xdist
diff --git a/requirements/tools.txt b/requirements/tools.txt
index a1efc1786a..dfae6b4903 100644
--- a/requirements/tools.txt
+++ b/requirements/tools.txt
@@ -48,7 +48,7 @@ com2ann==0.3.0
     # via shed
 coverage==7.4.1
     # via -r requirements/tools.in
-cryptography==42.0.2
+cryptography==42.0.3
     # via
     #   secretstorage
     #   types-pyopenssl
@@ -162,7 +162,7 @@ pelican[markdown]==4.9.1
     # via -r requirements/tools.in
 pexpect==4.9.0
     # via ipython
-pip-tools==7.3.0
+pip-tools==7.4.0
     # via -r requirements/tools.in
 pkginfo==1.9.6
     # via twine
@@ -195,10 +195,12 @@ pygments==2.17.2
 pyproject-api==1.6.1
     # via tox
 pyproject-hooks==1.0.0
-    # via build
+    # via
+    #   build
+    #   pip-tools
 pyright==1.1.350
     # via -r requirements/tools.in
-pytest==8.0.0
+pytest==8.0.1
     # via -r requirements/tools.in
 python-dateutil==2.8.2
     # via
@@ -231,7 +233,7 @@ rich==13.7.0
     # via
     #   pelican
     #   twine
-ruff==0.2.1
+ruff==0.2.2
     # via -r requirements/tools.in
 secretstorage==3.3.3
     # via keyring
@@ -299,7 +301,7 @@ tomli==2.0.1
     #   pyproject-hooks
     #   pytest
     #   tox
-tox==4.12.1
+tox==4.13.0
     # via -r requirements/tools.in
 traitlets==5.14.1
     # via
diff --git a/tooling/src/hypothesistooling/__main__.py b/tooling/src/hypothesistooling/__main__.py
index 4aafea80da..ae38bc2b62 100644
--- a/tooling/src/hypothesistooling/__main__.py
+++ b/tooling/src/hypothesistooling/__main__.py
@@ -431,12 +431,12 @@ def run_tox(task, version, *args):
     "3.8": "3.8.18",
     "3.9": "3.9.18",
     "3.10": "3.10.13",
-    "3.11": "3.11.6",
-    "3.12": "3.12.0",
-    "3.13": "3.13.0a2",
+    "3.11": "3.11.8",
+    "3.12": "3.12.2",
+    "3.13": "3.13.0a3",
     "pypy3.8": "pypy3.8-7.3.11",
-    "pypy3.9": "pypy3.9-7.3.13",
-    "pypy3.10": "pypy3.10-7.3.13",
+    "pypy3.9": "pypy3.9-7.3.15",
+    "pypy3.10": "pypy3.10-7.3.15",
 }
 ci_version = "3.10"  # Keep this in sync with GH Actions main.yml and .readthedocs.yml
 
diff --git a/website/content/2017-04-05-how-not-to-die-hard-with-hypothesis.md b/website/content/2017-04-05-how-not-to-die-hard-with-hypothesis.md
index 57e919ef04..d410879c1a 100644
--- a/website/content/2017-04-05-how-not-to-die-hard-with-hypothesis.md
+++ b/website/content/2017-04-05-how-not-to-die-hard-with-hypothesis.md
@@ -69,6 +69,8 @@ from hypothesis import note, settings
 from hypothesis.stateful import RuleBasedStateMachine, invariant, rule
 
 
+# The default is not always enough for Hypothesis to find a failing example.
+@settings(max_examples=2000)
 class DieHardProblem(RuleBasedStateMachine):
     small = 0
     big = 0
@@ -112,10 +114,7 @@ class DieHardProblem(RuleBasedStateMachine):
         assert self.big != 4
 
 
-# The default of 200 is sometimes not enough for Hypothesis to find
-# a falsifying example.
-with settings(max_examples=2000):
-    DieHardTest = DieHardProblem.TestCase
+DieHardTest = DieHardProblem.TestCase
 ```
 
 Calling `pytest` on this file quickly digs up a solution:

From 751498d5f8e625d07fce69f5bb808750387ca7a2 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Thu, 22 Feb 2024 20:41:10 -0500
Subject: [PATCH 03/43] bad debugging

---
 .../src/hypothesis/internal/conjecture/engine.py              | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index 6c43871c61..7fe49d1e90 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -306,12 +306,10 @@ def test_function(self, data):
 
             if changed:
                 if self.settings.backend != "hypothesis":
-                    existing_ir_tree = data.ir_tree
-                    data = ConjectureData.for_ir_tree(existing_ir_tree)
+                    data = ConjectureData.for_ir_tree(data.ir_tree)
                     self.__stoppable_test_function(data)
                     self.__data_cache[data.buffer] = data.as_result()
                     print(f"ran to buffer {data.buffer}")
-                    print(f"from ir tree {[l.value for l in existing_ir_tree.leaves()]}")
                 self.save_buffer(data.buffer)
                 self.interesting_examples[key] = data.as_result()
                 self.__data_cache.pin(data.buffer)

From 0a9881a402887e7e08e57a9e1085ec1d301b9f84 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Sat, 24 Feb 2024 13:19:11 -0500
Subject: [PATCH 04/43] update for crosshair

---
 .../src/hypothesis/internal/conjecture/data.py              | 6 ++++--
 .../src/hypothesis/internal/conjecture/engine.py            | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index bef8b04213..64eccc3f26 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -1710,8 +1710,10 @@ def draw_integer(
             "weights": weights,
             "shrink_towards": shrink_towards,
         }
-        if self.ir_tree_leaves is not None:
-            forced = self.ir_tree_leaves[self.ir_tree_leaves_index].value
+        if self.ir_tree_leaves is not None and observe:
+            leaf = self.ir_tree_leaves[self.ir_tree_leaves_index]
+            forced = leaf.value
+            assert kwargs == leaf.kwargs
             self.ir_tree_leaves_index += 1
             # assert isinstance(self.ir_current_node, IRTreeNode)
             # leaf = self.ir_current_node.children[self.ir_index_in_current_node]
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index 7fe49d1e90..2059d22f25 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -306,6 +306,8 @@ def test_function(self, data):
 
             if changed:
                 if self.settings.backend != "hypothesis":
+                    for node in data.ir_tree.leaves():
+                        node.value = data.provider.export_value(node.value)
                     data = ConjectureData.for_ir_tree(data.ir_tree)
                     self.__stoppable_test_function(data)
                     self.__data_cache[data.buffer] = data.as_result()

From 87aab305c633db5086e343c4187323792fb0e607 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Sat, 24 Feb 2024 16:44:49 -0500
Subject: [PATCH 05/43] formatting and remove some debug

---
 hypothesis-python/src/hypothesis/core.py      |  2 +-
 .../hypothesis/internal/conjecture/data.py    | 21 ++++++++++---------
 .../src/hypothesis/internal/escalation.py     |  1 -
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
index 1758acda41..8b78586410 100644
--- a/hypothesis-python/src/hypothesis/core.py
+++ b/hypothesis-python/src/hypothesis/core.py
@@ -1185,7 +1185,7 @@ def run_engine(self):
                                 info._expected_exception,
                                 info._expected_traceback,
                             ),
-                            per_case_context_fn=per_case_context_fn
+                            per_case_context_fn=per_case_context_fn,
                         )
             except (UnsatisfiedAssumption, StopTest) as e:
                 err = Flaky(
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index e9aec4660c..b3f71febde 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -1570,7 +1570,14 @@ def for_ir_tree(
         observer: Optional[DataObserver] = None,
         provider: type = PrimitiveProvider,
     ) -> "ConjectureData":
-        return cls(8 * 1024, b"", random=None, ir_tree_prefix=ir_tree_prefix, observer=observer, provider=provider)
+        return cls(
+            8 * 1024,
+            b"",
+            random=None,
+            ir_tree_prefix=ir_tree_prefix,
+            observer=observer,
+            provider=provider,
+        )
 
     def __init__(
         self,
@@ -1648,11 +1655,12 @@ def __init__(
         self.extra_information = ExtraInformation()
 
         self.ir_tree = IRTree()
-        self.ir_tree_leaves = None if ir_tree_prefix is None else ir_tree_prefix.leaves()
+        self.ir_tree_leaves = (
+            None if ir_tree_prefix is None else ir_tree_prefix.leaves()
+        )
         self.ir_tree_leaves_index = 0
         self.start_example(TOP_LABEL)
 
-
     def __repr__(self):
         return "ConjectureData(%s, %d bytes%s)" % (
             self.status.name,
@@ -1714,12 +1722,6 @@ def draw_integer(
             forced = leaf.value
             assert kwargs == leaf.kwargs
             self.ir_tree_leaves_index += 1
-            # assert isinstance(self.ir_current_node, IRTreeNode)
-            # leaf = self.ir_current_node.children[self.ir_index_in_current_node]
-            # forced = leaf.value
-            # # no flakiness
-            # assert leaf.kwargs == kwargs
-            # self.ir_index_in_current_node += 1
 
         value = self.provider.draw_integer(**kwargs, forced=forced)
         if observe:
@@ -1875,7 +1877,6 @@ def note(self, value: Any) -> None:
             value = repr(value)
         self.output += value
 
-
     def draw(
         self,
         strategy: "SearchStrategy[Ex]",
diff --git a/hypothesis-python/src/hypothesis/internal/escalation.py b/hypothesis-python/src/hypothesis/internal/escalation.py
index e284907e38..c3c678d239 100644
--- a/hypothesis-python/src/hypothesis/internal/escalation.py
+++ b/hypothesis-python/src/hypothesis/internal/escalation.py
@@ -93,7 +93,6 @@ def get_trimmed_traceback(exception=None):
         or hypothesis.settings.default.verbosity >= hypothesis.Verbosity.debug
         or is_hypothesis_file(traceback.extract_tb(tb)[-1][0])
         and not isinstance(exception, _Trimmable)
-        or True
     ):
         return tb
     while tb.tb_next is not None and (

From f22c875701f3a0060e0b3daf9be7b5642f26e176 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Sat, 24 Feb 2024 16:45:51 -0500
Subject: [PATCH 06/43] update IRTypeName

---
 hypothesis-python/src/hypothesis/internal/conjecture/data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index b3f71febde..4479e8f908 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -884,7 +884,7 @@ def draw_boolean(
 
 
 class IRTreeLeaf:
-    def __init__(self, *, ir_type: IRLiteralType, value: IRType, kwargs: IRKWargsType):
+    def __init__(self, *, ir_type: IRTypeName, value: IRType, kwargs: IRKWargsType):
         self.ir_type = ir_type
         self.value = value
         self.kwargs = kwargs
@@ -960,7 +960,7 @@ def stop_example(self):
 
         self.current_node = self.current_node.parent
 
-    def draw_value(self, ir_type: IRLiteralType, value: IRType, kwargs: IRKWargsType):
+    def draw_value(self, ir_type: IRTypeName, value: IRType, kwargs: IRKWargsType):
         assert self.current_node is not None
         leaf = IRTreeLeaf(ir_type=ir_type, value=value, kwargs=kwargs)
         self.current_node.children.append(leaf)

From f6863f70240ac2a37f6f4b5593dd51a559ab0e1a Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Sat, 24 Feb 2024 16:47:44 -0500
Subject: [PATCH 07/43] fix PrngProvider weights

---
 hypothesis-python/tests/conjecture/test_alt_backend.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py
index 8bae371725..1ee32d84b8 100644
--- a/hypothesis-python/tests/conjecture/test_alt_backend.py
+++ b/hypothesis-python/tests/conjecture/test_alt_backend.py
@@ -53,7 +53,9 @@ def draw_integer(
         if weights is not None:
             assert min_value is not None
             assert max_value is not None
-            return self.prng.choices(range(min_value, max_value + 1), weights=weights)
+            # use .choices so we can use the weights= param.
+            choices = self.prng.choices(range(min_value, max_value + 1), weights=weights, k=1)
+            return choices[0]
 
         if min_value is None and max_value is None:
             min_value = -(2**127)

From 074a6969bd6b34f7af70959a207d4d0a3615171d Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Sat, 24 Feb 2024 17:02:23 -0500
Subject: [PATCH 08/43] add post_test_case_hook

---
 .../src/hypothesis/internal/conjecture/data.py            | 7 +++++++
 .../src/hypothesis/internal/conjecture/engine.py          | 3 +--
 hypothesis-python/tests/conjecture/test_alt_backend.py    | 8 ++++++--
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index 4479e8f908..7b5ad965f8 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -1030,6 +1030,13 @@ class PrimitiveProvider:
     def __init__(self, conjecturedata: "ConjectureData", /) -> None:
         self._cd = conjecturedata
 
+    def post_test_case_hook(self, value):
+        # hook for providers to modify values returned by draw_* after a full
+        # test case concludes. Originally exposed for crosshair to reify its
+        # symbolic values into actual values.
+        # I'm not tied to this exact function name or design.
+        return value
+
     def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool:
         """Return True with probability p (assuming a uniform generator),
         shrinking towards False. If ``forced`` is set to a non-None value, this
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index 2059d22f25..6ae2155cd4 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -307,11 +307,10 @@ def test_function(self, data):
             if changed:
                 if self.settings.backend != "hypothesis":
                     for node in data.ir_tree.leaves():
-                        node.value = data.provider.export_value(node.value)
+                        node.value = data.provider.post_test_case_hook(node.value)
                     data = ConjectureData.for_ir_tree(data.ir_tree)
                     self.__stoppable_test_function(data)
                     self.__data_cache[data.buffer] = data.as_result()
-                    print(f"ran to buffer {data.buffer}")
                 self.save_buffer(data.buffer)
                 self.interesting_examples[key] = data.as_result()
                 self.__data_cache.pin(data.buffer)
diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py
index 1ee32d84b8..2bbff73e89 100644
--- a/hypothesis-python/tests/conjecture/test_alt_backend.py
+++ b/hypothesis-python/tests/conjecture/test_alt_backend.py
@@ -18,12 +18,16 @@
 
 from hypothesis import given, settings, strategies as st
 from hypothesis.database import InMemoryExampleDatabase
-from hypothesis.internal.conjecture.data import AVAILABLE_PROVIDERS, ConjectureData
+from hypothesis.internal.conjecture.data import (
+    AVAILABLE_PROVIDERS,
+    ConjectureData,
+    PrimitiveProvider,
+)
 from hypothesis.internal.floats import SIGNALING_NAN
 from hypothesis.internal.intervalsets import IntervalSet
 
 
-class PrngProvider:
+class PrngProvider(PrimitiveProvider):
     # A test-only implementation of the PrimitiveProvider interface, which uses
     # a very simple PRNG to choose each value.  Dumb but efficient, and entirely
     # independent of our real backend

From 8a9f916bc2b4895ed9a5b0a765652f9f71b25cc9 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Sat, 24 Feb 2024 17:02:38 -0500
Subject: [PATCH 09/43] format

---
 .../src/hypothesis/internal/conjecture/engine.py       | 10 +++-------
 hypothesis-python/tests/conjecture/test_alt_backend.py |  4 +++-
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index 6ae2155cd4..4149c76b5d 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -33,11 +33,7 @@
     PrimitiveProvider,
     Status,
 )
-from hypothesis.internal.conjecture.datatree import (
-    DataTree,
-    PreviouslyUnseenBehaviour,
-    TreeRecordingObserver,
-)
+from hypothesis.internal.conjecture.datatree import DataTree, PreviouslyUnseenBehaviour
 from hypothesis.internal.conjecture.junkdrawer import clamp, ensure_free_stackframes
 from hypothesis.internal.conjecture.pareto import NO_SCORE, ParetoFront, ParetoOptimiser
 from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key
@@ -950,7 +946,7 @@ def new_conjecture_data(self, prefix, max_length=BUFFER_SIZE, observer=None):
             provider_cls = getattr(importlib.import_module(mname), cname)
 
         observer = observer or self.tree.new_observer()
-        if self.settings.backend != "hypothesis": # replace with wants_datatree
+        if self.settings.backend != "hypothesis":  # replace with wants_datatree
             observer = DataObserver()
 
         return ConjectureData(
@@ -1100,7 +1096,7 @@ def kill_branch(self):
             prefix=buffer, max_length=max_length, observer=observer
         )
 
-        if self.settings.backend == "hypothesis": # replace with wants_datatree
+        if self.settings.backend == "hypothesis":  # replace with wants_datatree
             try:
                 self.tree.simulate_test_function(dummy_data)
             except PreviouslyUnseenBehaviour:
diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py
index 2bbff73e89..7b911ae11f 100644
--- a/hypothesis-python/tests/conjecture/test_alt_backend.py
+++ b/hypothesis-python/tests/conjecture/test_alt_backend.py
@@ -58,7 +58,9 @@ def draw_integer(
             assert min_value is not None
             assert max_value is not None
             # use .choices so we can use the weights= param.
-            choices = self.prng.choices(range(min_value, max_value + 1), weights=weights, k=1)
+            choices = self.prng.choices(
+                range(min_value, max_value + 1), weights=weights, k=1
+            )
             return choices[0]
 
         if min_value is None and max_value is None:

From 71e53a3cc8d3952659a216cdc1facc370365218d Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Sat, 24 Feb 2024 17:02:44 -0500
Subject: [PATCH 10/43] abstract to _pop_ir_tree_value

---
 .../hypothesis/internal/conjecture/data.py    | 27 ++++++++++++++++---
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index 7b5ad965f8..bbdcc79b5d 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -1725,10 +1725,7 @@ def draw_integer(
             "shrink_towards": shrink_towards,
         }
         if self.ir_tree_leaves is not None and observe:
-            leaf = self.ir_tree_leaves[self.ir_tree_leaves_index]
-            forced = leaf.value
-            assert kwargs == leaf.kwargs
-            self.ir_tree_leaves_index += 1
+            forced = self._pop_ir_tree_value("integer", kwargs)
 
         value = self.provider.draw_integer(**kwargs, forced=forced)
         if observe:
@@ -1768,6 +1765,9 @@ def draw_float(
             "allow_nan": allow_nan,
             "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
         }
+        if self.ir_tree_leaves is not None and observe:
+            forced = self._pop_ir_tree_value("float", kwargs)
+
         value = self.provider.draw_float(**kwargs, forced=forced)
         if observe:
             self.observer.draw_float(
@@ -1792,6 +1792,9 @@ def draw_string(
             "min_size": min_size,
             "max_size": max_size,
         }
+        if self.ir_tree_leaves is not None and observe:
+            forced = self._pop_ir_tree_value("string", kwargs)
+
         value = self.provider.draw_string(**kwargs, forced=forced)
         if observe:
             self.observer.draw_string(
@@ -1812,6 +1815,10 @@ def draw_bytes(
         assert size >= 0
 
         kwargs: BytesKWargs = {"size": size}
+
+        if self.ir_tree_leaves is not None and observe:
+            forced = self._pop_ir_tree_value("bytes", kwargs)
+
         value = self.provider.draw_bytes(**kwargs, forced=forced)
         if observe:
             self.observer.draw_bytes(
@@ -1834,6 +1841,10 @@ def draw_boolean(
             assert p < (1 - 2 ** (-64))
 
         kwargs: BooleanKWargs = {"p": p}
+
+        if self.ir_tree_leaves is not None and observe:
+            forced = self._pop_ir_tree_value("boolean", kwargs)
+
         value = self.provider.draw_boolean(**kwargs, forced=forced)
         if observe:
             self.observer.draw_boolean(
@@ -1842,6 +1853,14 @@ def draw_boolean(
             self.ir_tree.draw_boolean(value, kwargs)
         return value
 
+    def _pop_ir_tree_value(self, ir_type: IRTypeName, kwargs: IRKWargsType):
+        leaf = self.ir_tree_leaves[self.ir_tree_leaves_index]
+        assert leaf.ir_type == ir_type
+        assert kwargs == leaf.kwargs
+
+        self.ir_tree_leaves_index += 1
+        return leaf.value
+
     def as_result(self) -> Union[ConjectureResult, _Overrun]:
         """Convert the result of running this test into
         either an Overrun object or a ConjectureResult."""

From 66d39993f20681816f20c6c2e8e118d5a741cf9a Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Sat, 24 Feb 2024 17:05:59 -0500
Subject: [PATCH 11/43] typing

---
 .../src/hypothesis/internal/conjecture/data.py  | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index bbdcc79b5d..b88479168f 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -960,24 +960,24 @@ def stop_example(self):
 
         self.current_node = self.current_node.parent
 
-    def draw_value(self, ir_type: IRTypeName, value: IRType, kwargs: IRKWargsType):
+    def draw_value(self, ir_type: IRTypeName, value: IRType, kwargs: IRKWargsType) -> None:
         assert self.current_node is not None
         leaf = IRTreeLeaf(ir_type=ir_type, value=value, kwargs=kwargs)
         self.current_node.children.append(leaf)
 
-    def draw_integer(self, value: int, kwargs: IntegerKWargs):
+    def draw_integer(self, value: int, kwargs: IntegerKWargs) -> None:
         self.draw_value("integer", value, kwargs)
 
-    def draw_float(self, value: float, kwargs: FloatKWargs):
+    def draw_float(self, value: float, kwargs: FloatKWargs) -> None:
         self.draw_value("float", value, kwargs)
 
-    def draw_string(self, value: str, kwargs: StringKWargs):
+    def draw_string(self, value: str, kwargs: StringKWargs) -> None:
         self.draw_value("string", value, kwargs)
 
-    def draw_bytes(self, value: bytes, kwargs: BytesKWargs):
+    def draw_bytes(self, value: bytes, kwargs: BytesKWargs) -> None:
         self.draw_value("bytes", value, kwargs)
 
-    def draw_boolean(self, value: bool, kwargs: BooleanKWargs):
+    def draw_boolean(self, value: bool, kwargs: BooleanKWargs) -> None:
         self.draw_value("boolean", value, kwargs)
 
 
@@ -1853,7 +1853,8 @@ def draw_boolean(
             self.ir_tree.draw_boolean(value, kwargs)
         return value
 
-    def _pop_ir_tree_value(self, ir_type: IRTypeName, kwargs: IRKWargsType):
+    def _pop_ir_tree_value(self, ir_type: IRTypeName, kwargs: IRKWargsType) -> IRType:
+        assert self.ir_tree_leaves is not None
         leaf = self.ir_tree_leaves[self.ir_tree_leaves_index]
         assert leaf.ir_type == ir_type
         assert kwargs == leaf.kwargs
@@ -2052,7 +2053,7 @@ def choice(
         return values[i]
 
     def draw_bits(
-        self, n: int, *, forced: Optional[int] = None, fake_forced=False
+        self, n: int, *, forced: Optional[int] = None, fake_forced: bool = False
     ) -> int:
         """Return an ``n``-bit integer from the underlying source of
         bytes. If ``forced`` is set to an integer will instead

From e49a0eb29a175290f61443b102e33b9e04b1319d Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Sat, 24 Feb 2024 18:38:43 -0500
Subject: [PATCH 12/43] better backend database/shrinking

---
 .../hypothesis/internal/conjecture/data.py    | 12 +++------
 .../hypothesis/internal/conjecture/engine.py  | 27 ++++++++++++++-----
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index b88479168f..ddd89b9a30 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -641,9 +641,7 @@ def length(self) -> int:
 
     @property
     def trivial(self) -> bool:
-        # return self.forced or self.all_zero
-        # TODO need a fake_forced for shrinking.
-        return self.all_zero
+        return self.forced or self.all_zero
 
 
 class Blocks:
@@ -2052,9 +2050,7 @@ def choice(
         i = self.draw_integer(0, len(values) - 1, forced=forced_i, observe=observe)
         return values[i]
 
-    def draw_bits(
-        self, n: int, *, forced: Optional[int] = None, fake_forced: bool = False
-    ) -> int:
+    def draw_bits(self, n: int, *, forced: Optional[int] = None) -> int:
         """Return an ``n``-bit integer from the underlying source of
         bytes. If ``forced`` is set to an integer will instead
         ignore the underlying source and simulate a draw as if it had
@@ -2088,7 +2084,7 @@ def draw_bits(
         buf = bytes(buf)
         result = int_from_bytes(buf)
 
-        self.__example_record.draw_bits(n, forced and not fake_forced)
+        self.__example_record.draw_bits(n, forced)
 
         initial = self.index
 
@@ -2096,7 +2092,7 @@ def draw_bits(
         self.buffer.extend(buf)
         self.index = len(self.buffer)
 
-        if forced is not None and not fake_forced:
+        if forced is not None:
             self.forced_indices.update(range(initial, self.index))
 
         self.blocks.add_endpoint(self.index)
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index 4149c76b5d..1b8b8ebd86 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -284,6 +284,26 @@ def test_function(self, data):
             self.valid_examples += 1
 
         if data.status == Status.INTERESTING:
+            if self.settings.backend != "hypothesis":
+                for node in data.ir_tree.leaves():
+                    node.value = data.provider.post_test_case_hook(node.value)
+
+                # drive the ir tree through the test function to convert it
+                # to a buffer
+                data = ConjectureData.for_ir_tree(data.ir_tree)
+                self.__stoppable_test_function(data)
+
+                # ir tree conversion works by using forced=. This works great,
+                # but has the side effect of causing *all* blocks to be marked
+                # as forced. The shrinker in turn thinks these blocks are
+                # trivial and avoids shrinking them.
+                # We'll drive the buffer through the test function one more
+                # time to set up the blocks correctly for the shrinker.
+                data = ConjectureData.for_buffer(data.buffer)
+                self.__stoppable_test_function(data)
+
+                self.__data_cache[data.buffer] = data.as_result()
+
             key = data.interesting_origin
             changed = False
             try:
@@ -301,12 +321,6 @@ def test_function(self, data):
                     changed = True
 
             if changed:
-                if self.settings.backend != "hypothesis":
-                    for node in data.ir_tree.leaves():
-                        node.value = data.provider.post_test_case_hook(node.value)
-                    data = ConjectureData.for_ir_tree(data.ir_tree)
-                    self.__stoppable_test_function(data)
-                    self.__data_cache[data.buffer] = data.as_result()
                 self.save_buffer(data.buffer)
                 self.interesting_examples[key] = data.as_result()
                 self.__data_cache.pin(data.buffer)
@@ -826,6 +840,7 @@ def generate_mutations_from(self, data):
                     break
 
                 group = self.random.choice(groups)
+
                 ex1, ex2 = (
                     data.examples[i] for i in sorted(self.random.sample(group, 2))
                 )

From 76a9b57be3fb8e8174854e3225506ca674b9c779 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Sat, 24 Feb 2024 18:40:09 -0500
Subject: [PATCH 13/43] format

---
 hypothesis-python/src/hypothesis/internal/conjecture/data.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index ddd89b9a30..6989727683 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -958,7 +958,9 @@ def stop_example(self):
 
         self.current_node = self.current_node.parent
 
-    def draw_value(self, ir_type: IRTypeName, value: IRType, kwargs: IRKWargsType) -> None:
+    def draw_value(
+        self, ir_type: IRTypeName, value: IRType, kwargs: IRKWargsType
+    ) -> None:
         assert self.current_node is not None
         leaf = IRTreeLeaf(ir_type=ir_type, value=value, kwargs=kwargs)
         self.current_node.children.append(leaf)

From cfa41333c69f0b67f2d89029856e59f32dcc0cd7 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Wed, 28 Feb 2024 22:45:09 -0500
Subject: [PATCH 14/43] default result to None

---
 hypothesis-python/src/hypothesis/core.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
index 8b78586410..6bb7342367 100644
--- a/hypothesis-python/src/hypothesis/core.py
+++ b/hypothesis-python/src/hypothesis/core.py
@@ -947,6 +947,9 @@ def run(data):
         with local_settings(self.settings):
             with deterministic_PRNG():
                 with BuildContext(data, is_final=is_final) as context:
+                    # providers may throw in per_case_context_fn, and we'd like
+                    # `result` to still be set in these cases.
+                    result = None
                     with per_case_context_fn():
                         # Run the test function once, via the executor hook.
                         # In most cases this will delegate straight to `run(data)`.

From 447bf8b1e9911deea00c20a3eef24fc31ab0cd2f Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Wed, 28 Feb 2024 22:53:59 -0500
Subject: [PATCH 15/43] require post_test_case_hook to be nonnull

---
 .../src/hypothesis/internal/conjecture/engine.py             | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index 1b8b8ebd86..57bbb5af08 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -286,7 +286,10 @@ def test_function(self, data):
         if data.status == Status.INTERESTING:
             if self.settings.backend != "hypothesis":
                 for node in data.ir_tree.leaves():
-                    node.value = data.provider.post_test_case_hook(node.value)
+                    value = data.provider.post_test_case_hook(node.value)
+                    # require providers to return something valid here.
+                    assert node.value is not None
+                    node.value = value
 
                 # drive the ir tree through the test function to convert it
                 # to a buffer

From 9c1e375f717e996546987b75c61676d6c3801b59 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Thu, 29 Feb 2024 01:42:59 -0500
Subject: [PATCH 16/43] remove unecessary context wrapper

---
 hypothesis-python/src/hypothesis/core.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
index 6bb7342367..2097912f58 100644
--- a/hypothesis-python/src/hypothesis/core.py
+++ b/hypothesis-python/src/hypothesis/core.py
@@ -1178,18 +1178,15 @@ def run_engine(self):
             assert info._expected_exception is not None
             try:
                 with with_reporter(fragments.append):
-                    # TODO double check this is necessary.
-                    with hacky_patchable_run_context_yielding_per_test_case_context() as per_case_context_fn:
-                        self.execute_once(
-                            ran_example,
-                            print_example=not self.is_find,
-                            is_final=True,
-                            expected_failure=(
-                                info._expected_exception,
-                                info._expected_traceback,
-                            ),
-                            per_case_context_fn=per_case_context_fn,
-                        )
+                    self.execute_once(
+                        ran_example,
+                        print_example=not self.is_find,
+                        is_final=True,
+                        expected_failure=(
+                            info._expected_exception,
+                            info._expected_traceback,
+                        ),
+                    )
             except (UnsatisfiedAssumption, StopTest) as e:
                 err = Flaky(
                     "Unreliable assumption: An example which satisfied "

From c5e4d2cc9f2995a86f703b6bdf6263c85dfb5406 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Fri, 1 Mar 2024 22:37:06 -0500
Subject: [PATCH 17/43] move hacky context manager to the provider interface

---
 hypothesis-python/src/hypothesis/core.py      | 43 ++++++-------------
 .../hypothesis/internal/conjecture/data.py    |  4 ++
 .../hypothesis/internal/conjecture/engine.py  |  1 -
 3 files changed, 17 insertions(+), 31 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
index 2097912f58..da4556d599 100644
--- a/hypothesis-python/src/hypothesis/core.py
+++ b/hypothesis-python/src/hypothesis/core.py
@@ -758,15 +758,6 @@ def execute(data, function):
     return default_executor
 
 
-# This is hacky solution gives the experimental Crosshair backend a way to wrap
-# a context manager around all of the test cases generated for some test function,
-# and _that_ context can return a function for a context manager which will be
-# wrapped around each individual test case.  It's ugly, but it works.
-hacky_patchable_run_context_yielding_per_test_case_context = partial(
-    contextlib.nullcontext, enter_result=contextlib.nullcontext
-)
-
-
 class StateForActualGivenExecution:
     def __init__(self, stuff, test, settings, random, wrapped_test):
         self.test_runner = get_executor(stuff.selfy)
@@ -811,7 +802,6 @@ def execute_once(
         is_final=False,
         expected_failure=None,
         example_kwargs=None,
-        per_case_context_fn=contextlib.nullcontext,
     ):
         """Run the test function once, using ``data`` as input.
 
@@ -950,7 +940,7 @@ def run(data):
                     # providers may throw in per_case_context_fn, and we'd like
                     # `result` to still be set in these cases.
                     result = None
-                    with per_case_context_fn():
+                    with data.provider.per_test_case_context_manager():
                         # Run the test function once, via the executor hook.
                         # In most cases this will delegate straight to `run(data)`.
                         result = self.test_runner(data, run)
@@ -987,9 +977,7 @@ def run(data):
             ) from exception
         return result
 
-    def _execute_once_for_engine(
-        self, data: ConjectureData, *, per_case_context_fn: Any
-    ) -> None:
+    def _execute_once_for_engine(self, data: ConjectureData) -> None:
         """Wrapper around ``execute_once`` that intercepts test failure
         exceptions and single-test control exceptions, and turns them into
         appropriate method calls to `data` instead.
@@ -1013,17 +1001,13 @@ def _execute_once_for_engine(
                 # settrace() contention *not* by our coverage tests.  Ah well.
                 with Tracer() as tracer:
                     try:
-                        result = self.execute_once(
-                            data, per_case_context_fn=per_case_context_fn
-                        )
+                        result = self.execute_once(data)
                         if data.status == Status.VALID:
                             self.explain_traces[None].add(frozenset(tracer.branches))
                     finally:
                         trace = tracer.branches
             else:
-                result = self.execute_once(
-                    data, per_case_context_fn=per_case_context_fn
-                )
+                result = self.execute_once(data)
             if result is not None:
                 fail_health_check(
                     self.settings,
@@ -1115,16 +1099,15 @@ def run_engine(self):
             else:
                 database_key = None
 
-        with hacky_patchable_run_context_yielding_per_test_case_context() as ctx_fn:
-            runner = ConjectureRunner(
-                partial(self._execute_once_for_engine, per_case_context_fn=ctx_fn),
-                settings=self.settings,
-                random=self.random,
-                database_key=database_key,
-            )
-            # Use the Conjecture engine to run the test function many times
-            # on different inputs.
-            runner.run()
+        runner = ConjectureRunner(
+            self._execute_once_for_engine,
+            settings=self.settings,
+            random=self.random,
+            database_key=database_key,
+        )
+        # Use the Conjecture engine to run the test function many times
+        # on different inputs.
+        runner.run()
         note_statistics(runner.statistics)
         deliver_json_blob(
             {
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index 6989727683..ab77089fbf 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -8,6 +8,7 @@
 # v. 2.0. If a copy of the MPL was not distributed with this file, You can
 # obtain one at https://mozilla.org/MPL/2.0/.
 
+import contextlib
 import math
 import time
 from collections import defaultdict
@@ -1037,6 +1038,9 @@ def post_test_case_hook(self, value):
         # I'm not tied to this exact function name or design.
         return value
 
+    def per_test_case_context_manager(self):
+        return contextlib.nullcontext()
+
     def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool:
         """Return True with probability p (assuming a uniform generator),
         shrinking towards False. If ``forced`` is set to a non-None value, this
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index 57bbb5af08..ac6cfb3fec 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -226,7 +226,6 @@ def test_function(self, data):
             self.debug(self.__pending_call_explanation)
             self.__pending_call_explanation = None
 
-        # assert isinstance(data.observer, TreeRecordingObserver)
         self.call_count += 1
 
         interrupted = False

From e11822c96cc549a4b29ab091bbc0539f24eeda8c Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 00:42:33 -0500
Subject: [PATCH 18/43] Merge branch 'master' into provider-plugins

---
 .gitignore                                    |   2 +
 hypothesis-python/.coveragerc                 |   2 +
 hypothesis-python/docs/changes.rst            |  64 +++++
 hypothesis-python/docs/numpy.rst              |   2 +-
 hypothesis-python/src/hypothesis/core.py      |  22 +-
 .../src/hypothesis/extra/array_api.py         |   8 +-
 .../src/hypothesis/extra/ghostwriter.py       |   4 +-
 .../src/hypothesis/extra/numpy.py             |  93 +++++--
 .../src/hypothesis/internal/compat.py         |   5 +-
 .../hypothesis/internal/conjecture/data.py    | 243 ++++++++----------
 .../src/hypothesis/internal/filtering.py      |  13 +-
 .../src/hypothesis/internal/intervalsets.py   |   3 +
 hypothesis-python/src/hypothesis/stateful.py  |  33 ++-
 .../strategies/_internal/collections.py       |  17 +-
 .../hypothesis/strategies/_internal/core.py   |   8 +-
 .../hypothesis/strategies/_internal/lazy.py   |  36 +--
 .../strategies/_internal/strategies.py        |  84 ++++--
 .../strategies/_internal/strings.py           | 165 ++++++++----
 .../src/hypothesis/vendor/pretty.py           |   2 +-
 hypothesis-python/src/hypothesis/version.py   |   2 +-
 hypothesis-python/tests/array_api/README.md   |   7 +-
 hypothesis-python/tests/conjecture/common.py  |  40 ++-
 .../tests/conjecture/test_forced.py           |  20 ++
 hypothesis-python/tests/conjecture/test_ir.py |  90 +++++--
 .../tests/conjecture/test_test_data.py        |   2 +-
 hypothesis-python/tests/cover/test_compat.py  |  10 +
 .../tests/cover/test_custom_reprs.py          |   2 +-
 .../tests/cover/test_filter_rewriting.py      | 100 ++++++-
 .../tests/cover/test_health_checks.py         |  15 +-
 .../tests/cover/test_monitoring.py            |  41 +++
 .../tests/cover/test_stateful.py              | 128 ++++++++-
 .../tests/pandas/test_indexes.py              |   2 +-
 requirements/coverage.txt                     |  14 +-
 requirements/fuzzing.txt                      |  30 +--
 requirements/test.txt                         |   2 +-
 requirements/tools.in                         |   1 +
 requirements/tools.txt                        |  50 ++--
 whole-repo-tests/test_mypy.py                 |  27 +-
 whole-repo-tests/test_pyright.py              |  87 ++++++-
 39 files changed, 1092 insertions(+), 384 deletions(-)
 create mode 100644 hypothesis-python/tests/cover/test_monitoring.py

diff --git a/.gitignore b/.gitignore
index 9a0c7886fc..5a404bcaaa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@
 .runtimes
 /hypothesis-python/branch-check
 /pythonpython3.*
+/pythonpypy3.*
 .pyodide-xbuildenv
 
 # python
@@ -104,3 +105,4 @@ __pycache__
 HypothesisWorks.github.io.iml
 jekyll.log
 /website/output/
+/t.py
diff --git a/hypothesis-python/.coveragerc b/hypothesis-python/.coveragerc
index 89be7942bc..dc89960dc4 100644
--- a/hypothesis-python/.coveragerc
+++ b/hypothesis-python/.coveragerc
@@ -20,6 +20,7 @@ exclude_lines =
     pragma: no cover
     raise NotImplementedError
     def __repr__
+    def _repr_pretty_
     def __ne__
     def __copy__
     def __deepcopy__
@@ -30,3 +31,4 @@ exclude_lines =
     if sys\.version_info
     if "[\w\.]+" in sys\.modules:
     if .+ := sys\.modules\.get\("[\w\.]+"\)
+    @overload
diff --git a/hypothesis-python/docs/changes.rst b/hypothesis-python/docs/changes.rst
index 00d0fb3491..fb8cfe7a5f 100644
--- a/hypothesis-python/docs/changes.rst
+++ b/hypothesis-python/docs/changes.rst
@@ -18,6 +18,70 @@ Hypothesis 6.x
 
     .. include:: ../RELEASE.rst
 
+.. _v6.98.17:
+
+--------------------
+6.98.17 - 2024-03-04
+--------------------
+
+This patch improves the type annotations in :mod:`hypothesis.extra.numpy`,
+which makes inferred types more precise for both :pypi:`mypy` and
+:pypi:`pyright`, and fixes some strict-mode errors on the latter.
+
+Thanks to Jonathan Plasse for reporting and fixing this in :pull:`3889`!
+
+.. _v6.98.16:
+
+--------------------
+6.98.16 - 2024-03-04
+--------------------
+
+This patch paves the way for future shrinker improvements. There is no user-visible change.
+
+.. _v6.98.15:
+
+--------------------
+6.98.15 - 2024-02-29
+--------------------
+
+This release adds support for the Array API's `2023.12 release
+<https://data-apis.org/array-api/2023.12/>`_ via the ``api_version`` argument in
+:func:`~hypothesis.extra.array_api.make_strategies_namespace`. The API additions
+and modifications in the ``2023.12`` spec do not necessitate any changes in the
+Hypothesis strategies, hence there is no distinction between a ``2022.12`` and
+``2023.12`` strategies namespace.
+
+.. _v6.98.14:
+
+--------------------
+6.98.14 - 2024-02-29
+--------------------
+
+This patch adjusts the printing of bundle values to correspond
+with their names when using stateful testing.
+
+.. _v6.98.13:
+
+--------------------
+6.98.13 - 2024-02-27
+--------------------
+
+This patch implements filter-rewriting for :func:`~hypothesis.strategies.text`
+and :func:`~hypothesis.strategies.binary` with the :meth:`~re.Pattern.search`,
+:meth:`~re.Pattern.match`, or :meth:`~re.Pattern.fullmatch` method of a
+:func:`re.compile`\ d regex.
+
+.. _v6.98.12:
+
+--------------------
+6.98.12 - 2024-02-25
+--------------------
+
+This patch implements filter-rewriting for most length filters on some
+additional collection types (:issue:`3795`), and fixes several latent
+bugs where unsatisfiable or partially-infeasible rewrites could trigger
+internal errors.
+
 .. _v6.98.11:
 
 --------------------
diff --git a/hypothesis-python/docs/numpy.rst b/hypothesis-python/docs/numpy.rst
index 1ac6d33fdb..be1c9c7b4c 100644
--- a/hypothesis-python/docs/numpy.rst
+++ b/hypothesis-python/docs/numpy.rst
@@ -63,7 +63,7 @@ Hypothesis offers strategies for `Array API <https://data-apis.org/>`_ adopting
 libraries in the ``hypothesis.extra.array_api`` package. See :issue:`3037` for
 more details.  If you want to test with :pypi:`CuPy`, :pypi:`Dask`, :pypi:`JAX`,
 :pypi:`MXNet`, :pypi:`PyTorch <torch>`, :pypi:`TensorFlow`, or :pypi:`Xarray` -
-or just ``numpy.array_api`` - this is the extension for you!
+or just :pypi:`NumPy` - this is the extension for you!
 
 .. autofunction:: hypothesis.extra.array_api.make_strategies_namespace
 
diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
index da4556d599..e038f30565 100644
--- a/hypothesis-python/src/hypothesis/core.py
+++ b/hypothesis-python/src/hypothesis/core.py
@@ -108,6 +108,7 @@
     repr_call,
 )
 from hypothesis.internal.scrutineer import (
+    MONITORING_TOOL_ID,
     Trace,
     Tracer,
     explanatory_lines,
@@ -987,8 +988,27 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None:
         """
         trace: Trace = set()
         try:
+            # this is actually covered by our tests, but only on >= 3.12.
+            if (
+                sys.version_info[:2] >= (3, 12)
+                and sys.monitoring.get_tool(MONITORING_TOOL_ID) is not None
+            ):  # pragma: no cover
+                warnings.warn(
+                    "avoiding tracing test function because tool id "
+                    f"{MONITORING_TOOL_ID} is already taken by tool "
+                    f"{sys.monitoring.get_tool(MONITORING_TOOL_ID)}.",
+                    HypothesisWarning,
+                    # I'm not sure computing a correct stacklevel is reasonable
+                    # given the number of entry points here.
+                    stacklevel=1,
+                )
+
             _can_trace = (
-                sys.gettrace() is None or sys.version_info[:2] >= (3, 12)
+                (sys.version_info[:2] < (3, 12) and sys.gettrace() is None)
+                or (
+                    sys.version_info[:2] >= (3, 12)
+                    and sys.monitoring.get_tool(MONITORING_TOOL_ID) is None
+                )
             ) and not PYPY
             _trace_obs = TESTCASE_CALLBACKS and OBSERVABILITY_COLLECT_COVERAGE
             _trace_failure = (
diff --git a/hypothesis-python/src/hypothesis/extra/array_api.py b/hypothesis-python/src/hypothesis/extra/array_api.py
index d62418f317..ce0993ab3b 100644
--- a/hypothesis-python/src/hypothesis/extra/array_api.py
+++ b/hypothesis-python/src/hypothesis/extra/array_api.py
@@ -69,10 +69,10 @@
 ]
 
 
-RELEASED_VERSIONS = ("2021.12", "2022.12")
+RELEASED_VERSIONS = ("2021.12", "2022.12", "2023.12")
 NOMINAL_VERSIONS = (*RELEASED_VERSIONS, "draft")
 assert sorted(NOMINAL_VERSIONS) == list(NOMINAL_VERSIONS)  # sanity check
-NominalVersion = Literal["2021.12", "2022.12", "draft"]
+NominalVersion = Literal["2021.12", "2022.12", "2023.12", "draft"]
 assert get_args(NominalVersion) == NOMINAL_VERSIONS  # sanity check
 
 
@@ -1091,7 +1091,7 @@ def complex_dtypes(
 
         np = Mock()
     else:
-        np = None
+        np = None  # type: ignore[assignment]
 if np is not None:
 
     class FloatInfo(NamedTuple):
@@ -1112,7 +1112,7 @@ def mock_finfo(dtype: DataType) -> FloatInfo:
         introduced it in v1.21.1, so we just use the equivalent tiny attribute
         to keep mocking with older versions working.
         """
-        _finfo = np.finfo(dtype)
+        _finfo = np.finfo(dtype)  # type: ignore[call-overload]
         return FloatInfo(
             int(_finfo.bits),
             float(_finfo.eps),
diff --git a/hypothesis-python/src/hypothesis/extra/ghostwriter.py b/hypothesis-python/src/hypothesis/extra/ghostwriter.py
index 8917d5bd87..2854b48c29 100644
--- a/hypothesis-python/src/hypothesis/extra/ghostwriter.py
+++ b/hypothesis-python/src/hypothesis/extra/ghostwriter.py
@@ -122,7 +122,7 @@
 from hypothesis.strategies._internal.lazy import LazyStrategy, unwrap_strategies
 from hypothesis.strategies._internal.strategies import (
     FilteredStrategy,
-    MappedSearchStrategy,
+    MappedStrategy,
     OneOfStrategy,
     SampledFromStrategy,
 )
@@ -627,7 +627,7 @@ def _imports_for_strategy(strategy):
         strategy = unwrap_strategies(strategy)
 
     # Get imports for s.map(f), s.filter(f), s.flatmap(f), including both s and f
-    if isinstance(strategy, MappedSearchStrategy):
+    if isinstance(strategy, MappedStrategy):
         imports |= _imports_for_strategy(strategy.mapped_strategy)
         imports |= _imports_for_object(strategy.pack)
     if isinstance(strategy, FilteredStrategy):
diff --git a/hypothesis-python/src/hypothesis/extra/numpy.py b/hypothesis-python/src/hypothesis/extra/numpy.py
index 29d73f76be..c76edcf9a9 100644
--- a/hypothesis-python/src/hypothesis/extra/numpy.py
+++ b/hypothesis-python/src/hypothesis/extra/numpy.py
@@ -21,6 +21,8 @@
     Type,
     TypeVar,
     Union,
+    cast,
+    overload,
 )
 
 import numpy as np
@@ -50,7 +52,7 @@
 from hypothesis.strategies._internal.numbers import Real
 from hypothesis.strategies._internal.strategies import (
     Ex,
-    MappedSearchStrategy,
+    MappedStrategy,
     T,
     check_strategy,
 )
@@ -136,7 +138,7 @@ def from_dtype(
     kwargs = {k: v for k, v in locals().items() if k != "dtype" and v is not None}
 
     # Compound datatypes, eg 'f4,f4,f4'
-    if dtype.names is not None:
+    if dtype.names is not None and dtype.fields is not None:
         # mapping np.void.type over a strategy is nonsense, so return now.
         subs = [from_dtype(dtype.fields[name][0], **kwargs) for name in dtype.names]
         return st.tuples(*subs)
@@ -164,7 +166,7 @@ def compat_kw(*args, **kw):
         result: st.SearchStrategy[Any] = st.booleans()
     elif dtype.kind == "f":
         result = st.floats(
-            width=min(8 * dtype.itemsize, 64),
+            width=cast(Literal[16, 32, 64], min(8 * dtype.itemsize, 64)),
             **compat_kw(
                 "min_value",
                 "max_value",
@@ -177,7 +179,9 @@ def compat_kw(*args, **kw):
         )
     elif dtype.kind == "c":
         result = st.complex_numbers(
-            width=min(8 * dtype.itemsize, 128),  # convert from bytes to bits
+            width=cast(
+                Literal[32, 64, 128], min(8 * dtype.itemsize, 128)
+            ),  # convert from bytes to bits
             **compat_kw(
                 "min_magnitude",
                 "max_magnitude",
@@ -411,6 +415,31 @@ def fill_for(elements, unique, fill, name=""):
 
 
 D = TypeVar("D", bound="DTypeLike")
+G = TypeVar("G", bound="np.generic")
+
+
+@overload
+@defines_strategy(force_reusable_values=True)
+def arrays(
+    dtype: Union["np.dtype[G]", st.SearchStrategy["np.dtype[G]"]],
+    shape: Union[int, st.SearchStrategy[int], Shape, st.SearchStrategy[Shape]],
+    *,
+    elements: Optional[Union[st.SearchStrategy[Any], Mapping[str, Any]]] = None,
+    fill: Optional[st.SearchStrategy[Any]] = None,
+    unique: bool = False,
+) -> "st.SearchStrategy[NDArray[G]]": ...
+
+
+@overload
+@defines_strategy(force_reusable_values=True)
+def arrays(
+    dtype: Union[D, st.SearchStrategy[D]],
+    shape: Union[int, st.SearchStrategy[int], Shape, st.SearchStrategy[Shape]],
+    *,
+    elements: Optional[Union[st.SearchStrategy[Any], Mapping[str, Any]]] = None,
+    fill: Optional[st.SearchStrategy[Any]] = None,
+    unique: bool = False,
+) -> "st.SearchStrategy[NDArray[Any]]": ...
 
 
 @defines_strategy(force_reusable_values=True)
@@ -421,7 +450,7 @@ def arrays(
     elements: Optional[Union[st.SearchStrategy[Any], Mapping[str, Any]]] = None,
     fill: Optional[st.SearchStrategy[Any]] = None,
     unique: bool = False,
-) -> "st.SearchStrategy[NDArray[D]]":
+) -> "st.SearchStrategy[NDArray[Any]]":
     r"""Returns a strategy for generating :class:`numpy:numpy.ndarray`\ s.
 
     * ``dtype`` may be any valid input to :class:`~numpy:numpy.dtype`
@@ -498,7 +527,7 @@ def arrays(
             lambda s: arrays(dtype, s, elements=elements, fill=fill, unique=unique)
         )
     # From here on, we're only dealing with values and it's relatively simple.
-    dtype = np.dtype(dtype)
+    dtype = np.dtype(dtype)  # type: ignore[arg-type,assignment]
     assert isinstance(dtype, np.dtype)  # help mypy out a bit...
     if elements is None or isinstance(elements, Mapping):
         if dtype.kind in ("m", "M") and "[" not in dtype.str:
@@ -516,7 +545,7 @@ def arrays(
     # If there's a redundant cast to the requested dtype, remove it.  This unlocks
     # optimizations such as fast unique sampled_from, and saves some time directly too.
     unwrapped = unwrap_strategies(elements)
-    if isinstance(unwrapped, MappedSearchStrategy) and unwrapped.pack == dtype.type:
+    if isinstance(unwrapped, MappedStrategy) and unwrapped.pack == dtype.type:
         elements = unwrapped.mapped_strategy
     if isinstance(shape, int):
         shape = (shape,)
@@ -554,8 +583,8 @@ def inner(*args, **kwargs):
 
 
 @defines_dtype_strategy
-def boolean_dtypes() -> st.SearchStrategy[np.dtype]:
-    return st.just("?")
+def boolean_dtypes() -> st.SearchStrategy["np.dtype[np.bool_]"]:
+    return st.just("?")  # type: ignore[arg-type]
 
 
 def dtype_factory(kind, sizes, valid_sizes, endianness):
@@ -592,7 +621,7 @@ def unsigned_integer_dtypes(
     *,
     endianness: str = "?",
     sizes: Sequence[Literal[8, 16, 32, 64]] = (8, 16, 32, 64),
-) -> st.SearchStrategy[np.dtype]:
+) -> st.SearchStrategy["np.dtype[np.unsignedinteger[Any]]"]:
     """Return a strategy for unsigned integer dtypes.
 
     endianness may be ``<`` for little-endian, ``>`` for big-endian,
@@ -610,7 +639,7 @@ def integer_dtypes(
     *,
     endianness: str = "?",
     sizes: Sequence[Literal[8, 16, 32, 64]] = (8, 16, 32, 64),
-) -> st.SearchStrategy[np.dtype]:
+) -> st.SearchStrategy["np.dtype[np.signedinteger[Any]]"]:
     """Return a strategy for signed integer dtypes.
 
     endianness and sizes are treated as for
@@ -624,7 +653,7 @@ def floating_dtypes(
     *,
     endianness: str = "?",
     sizes: Sequence[Literal[16, 32, 64, 96, 128]] = (16, 32, 64),
-) -> st.SearchStrategy[np.dtype]:
+) -> st.SearchStrategy["np.dtype[np.floating[Any]]"]:
     """Return a strategy for floating-point dtypes.
 
     sizes is the size in bits of floating-point number.  Some machines support
@@ -642,7 +671,7 @@ def complex_number_dtypes(
     *,
     endianness: str = "?",
     sizes: Sequence[Literal[64, 128, 192, 256]] = (64, 128),
-) -> st.SearchStrategy[np.dtype]:
+) -> st.SearchStrategy["np.dtype[np.complexfloating[Any, Any]]"]:
     """Return a strategy for complex-number dtypes.
 
     sizes is the total size in bits of a complex number, which consists
@@ -681,7 +710,7 @@ def validate_time_slice(max_period, min_period):
 @defines_dtype_strategy
 def datetime64_dtypes(
     *, max_period: str = "Y", min_period: str = "ns", endianness: str = "?"
-) -> st.SearchStrategy[np.dtype]:
+) -> st.SearchStrategy["np.dtype[np.datetime64]"]:
     """Return a strategy for datetime64 dtypes, with various precisions from
     year to attosecond."""
     return dtype_factory(
@@ -695,7 +724,7 @@ def datetime64_dtypes(
 @defines_dtype_strategy
 def timedelta64_dtypes(
     *, max_period: str = "Y", min_period: str = "ns", endianness: str = "?"
-) -> st.SearchStrategy[np.dtype]:
+) -> st.SearchStrategy["np.dtype[np.timedelta64]"]:
     """Return a strategy for timedelta64 dtypes, with various precisions from
     year to attosecond."""
     return dtype_factory(
@@ -709,7 +738,7 @@ def timedelta64_dtypes(
 @defines_dtype_strategy
 def byte_string_dtypes(
     *, endianness: str = "?", min_len: int = 1, max_len: int = 16
-) -> st.SearchStrategy[np.dtype]:
+) -> st.SearchStrategy["np.dtype[np.bytes_]"]:
     """Return a strategy for generating bytestring dtypes, of various lengths
     and byteorder.
 
@@ -724,7 +753,7 @@ def byte_string_dtypes(
 @defines_dtype_strategy
 def unicode_string_dtypes(
     *, endianness: str = "?", min_len: int = 1, max_len: int = 16
-) -> st.SearchStrategy[np.dtype]:
+) -> st.SearchStrategy["np.dtype[np.str_]"]:
     """Return a strategy for generating unicode string dtypes, of various
     lengths and byteorder.
 
@@ -771,7 +800,7 @@ def array_dtypes(
         elements |= st.tuples(
             name_titles, subtype_strategy, array_shapes(max_dims=2, max_side=2)
         )
-    return st.lists(
+    return st.lists(  # type: ignore[return-value]
         elements=elements,
         min_size=min_size,
         max_size=max_size,
@@ -948,13 +977,35 @@ def basic_indices(
     )
 
 
+I = TypeVar("I", bound=np.integer)
+
+
+@overload
+@defines_strategy()
+def integer_array_indices(
+    shape: Shape,
+    *,
+    result_shape: st.SearchStrategy[Shape] = array_shapes(),
+) -> "st.SearchStrategy[Tuple[NDArray[np.signedinteger[Any]], ...]]": ...
+
+
+@overload
+@defines_strategy()
+def integer_array_indices(
+    shape: Shape,
+    *,
+    result_shape: st.SearchStrategy[Shape] = array_shapes(),
+    dtype: "np.dtype[I]",
+) -> "st.SearchStrategy[Tuple[NDArray[I], ...]]": ...
+
+
 @defines_strategy()
 def integer_array_indices(
     shape: Shape,
     *,
     result_shape: st.SearchStrategy[Shape] = array_shapes(),
-    dtype: D = np.dtype(int),
-) -> "st.SearchStrategy[Tuple[NDArray[D], ...]]":
+    dtype: "np.dtype[I] | np.dtype[np.signedinteger[Any]]" = np.dtype(int),
+) -> "st.SearchStrategy[Tuple[NDArray[I], ...]]":
     """Return a search strategy for tuples of integer-arrays that, when used
     to index into an array of shape ``shape``, given an array whose shape
     was drawn from ``result_shape``.
@@ -1146,7 +1197,7 @@ def _from_type(thing: Type[Ex]) -> Optional[st.SearchStrategy[Ex]]:
 
     if real_thing in [np.ndarray, _SupportsArray]:
         dtype, shape = _dtype_and_shape_from_args(args)
-        return arrays(dtype, shape)
+        return arrays(dtype, shape)  # type: ignore[return-value]
 
     # We didn't find a type to resolve, continue
     return None
diff --git a/hypothesis-python/src/hypothesis/internal/compat.py b/hypothesis-python/src/hypothesis/internal/compat.py
index 24e30b112b..f979a075c2 100644
--- a/hypothesis-python/src/hypothesis/internal/compat.py
+++ b/hypothesis-python/src/hypothesis/internal/compat.py
@@ -50,7 +50,10 @@ def add_note(exc, note):
         exc.add_note(note)
     except AttributeError:
         if not hasattr(exc, "__notes__"):
-            exc.__notes__ = []
+            try:
+                exc.__notes__ = []
+            except AttributeError:
+                return  # give up, might be e.g. a frozen dataclass
         exc.__notes__.append(note)
 
 
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index ab77089fbf..da114da772 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -204,6 +204,8 @@ def structural_coverage(label: int) -> StructuralCoverageTag:
 
 FLOAT_INIT_LOGIC_CACHE = LRUReusedCache(4096)
 
+POOLED_KWARGS_CACHE = LRUReusedCache(4096)
+
 DRAW_STRING_DEFAULT_MAX_SIZE = 10**10  # "arbitrarily large"
 
 
@@ -335,6 +337,7 @@ def __init__(self, examples: "Examples"):
         self.bytes_read = 0
         self.example_count = 0
         self.block_count = 0
+        self.ir_node_count = 0
 
     def run(self) -> Any:
         """Rerun the test case with this visitor and return the
@@ -348,6 +351,10 @@ def run(self) -> Any:
                 self.block(self.block_count)
                 self.block_count += 1
                 self.__pop(discarded=False)
+            elif record == IR_NODE_RECORD:
+                data = self.examples.ir_nodes[self.ir_node_count]
+                self.ir_node(data)
+                self.ir_node_count += 1
             elif record >= START_EXAMPLE_RECORD:
                 self.__push(record - START_EXAMPLE_RECORD)
             else:
@@ -388,6 +395,9 @@ def stop_example(self, i: int, *, discarded: bool) -> None:
         index of the example and ``discarded`` being ``True`` if ``stop_example``
         was called with ``discard=True``."""
 
+    def ir_node(self, node: "IRNode") -> None:
+        """Called when an ir node is drawn."""
+
     def finish(self) -> Any:
         return self.result
 
@@ -420,6 +430,8 @@ def lazy_calculate(self: "Examples") -> IntList:
 STOP_EXAMPLE_NO_DISCARD_RECORD = 2
 START_EXAMPLE_RECORD = 3
 
+IR_NODE_RECORD = calc_label_from_name("ir draw record")
+
 
 class ExampleRecord:
     """Records the series of ``start_example``, ``stop_example``, and
@@ -436,10 +448,18 @@ def __init__(self) -> None:
         self.labels = [DRAW_BYTES_LABEL]
         self.__index_of_labels: "Optional[Dict[int, int]]" = {DRAW_BYTES_LABEL: 0}
         self.trail = IntList()
+        self.ir_nodes: List[IRNode] = []
 
     def freeze(self) -> None:
         self.__index_of_labels = None
 
+    def record_ir_draw(self, ir_type, value, *, kwargs, was_forced):
+        self.trail.append(IR_NODE_RECORD)
+        node = IRNode(
+            ir_type=ir_type, value=value, kwargs=kwargs, was_forced=was_forced
+        )
+        self.ir_nodes.append(node)
+
     def start_example(self, label: int) -> None:
         assert self.__index_of_labels is not None
         try:
@@ -455,7 +475,7 @@ def stop_example(self, *, discard: bool) -> None:
         else:
             self.trail.append(STOP_EXAMPLE_NO_DISCARD_RECORD)
 
-    def draw_bits(self, n: int, forced: Optional[int]) -> None:
+    def draw_bits(self) -> None:
         self.trail.append(DRAW_BITS_RECORD)
 
 
@@ -472,6 +492,7 @@ class Examples:
 
     def __init__(self, record: ExampleRecord, blocks: "Blocks") -> None:
         self.trail = record.trail
+        self.ir_nodes = record.ir_nodes
         self.labels = record.labels
         self.__length = (
             self.trail.count(STOP_EXAMPLE_DISCARD_RECORD)
@@ -557,6 +578,15 @@ def start_example(self, i: int, label_index: int) -> None:
 
     depths: IntList = calculated_example_property(_depths)
 
+    class _ir_tree_nodes(ExampleProperty):
+        def begin(self):
+            self.result = []
+
+        def ir_node(self, ir_node):
+            self.result.append(ir_node)
+
+    ir_tree_nodes: "List[IRNode]" = calculated_example_property(_ir_tree_nodes)
+
     class _label_indices(ExampleProperty):
         def start_example(self, i: int, label_index: int) -> None:
             self.result[i] = label_index
@@ -857,129 +887,37 @@ def kill_branch(self) -> None:
         """Mark this part of the tree as not worth re-exploring."""
 
     def draw_integer(
-        self, value: int, *, was_forced: bool, kwargs: IntegerKWargs
+        self, value: int, *, kwargs: IntegerKWargs, was_forced: bool
     ) -> None:
         pass
 
     def draw_float(
-        self, value: float, *, was_forced: bool, kwargs: FloatKWargs
+        self, value: float, *, kwargs: FloatKWargs, was_forced: bool
     ) -> None:
         pass
 
     def draw_string(
-        self, value: str, *, was_forced: bool, kwargs: StringKWargs
+        self, value: str, *, kwargs: StringKWargs, was_forced: bool
     ) -> None:
         pass
 
     def draw_bytes(
-        self, value: bytes, *, was_forced: bool, kwargs: BytesKWargs
+        self, value: bytes, *, kwargs: BytesKWargs, was_forced: bool
     ) -> None:
         pass
 
     def draw_boolean(
-        self, value: bool, *, was_forced: bool, kwargs: BooleanKWargs
+        self, value: bool, *, kwargs: BooleanKWargs, was_forced: bool
     ) -> None:
         pass
 
 
-class IRTreeLeaf:
-    def __init__(self, *, ir_type: IRTypeName, value: IRType, kwargs: IRKWargsType):
-        self.ir_type = ir_type
-        self.value = value
-        self.kwargs = kwargs
-
-    def _repr_pretty_(self, p, cycle):
-        assert cycle is False
-        p.text(f"{self.ir_type} {self.value} {self.kwargs}")
-
-
-class IRTreeNode:
-    def __init__(
-        self,
-        *,
-        label: int,
-        parent: Optional["IRTreeNode"],
-        index_in_parent: Optional[int] = None,
-    ):
-        self.parent = parent
-        self.index_in_parent = index_in_parent
-        self.label = label
-        self.children: List[Union[IRTreeNode, IRTreeLeaf]] = []
-
-    def _repr_pretty_(self, p, cycle):
-        assert cycle is False
-        p.text(str(self.label))
-        with p.indent(1):
-            for child in self.children:
-                p.break_()
-                p.pretty(child)
-
-
-class IRTree:
-    def __init__(self):
-        # only None when the top example hasn't been started yet
-        self.root: Optional[IRTreeNode] = None
-        self.current_node: Optional[IRTreeNode] = None
-
-    def _repr_pretty_(self, p, cycle):
-        assert cycle is False
-        p.pretty(self.root)
-
-    def leaves(self) -> List[IRTreeLeaf]:
-        def _leaves(node):
-            leaves = []
-            for child in node.children:
-                if isinstance(child, IRTreeNode):
-                    leaves += _leaves(child)
-                else:
-                    assert isinstance(child, IRTreeLeaf)
-                    leaves.append(child)
-            return leaves
-
-        return _leaves(self.root)
-
-    def start_example(self, label):
-        if self.root is None:
-            assert label == TOP_LABEL
-            self.root = IRTreeNode(label=TOP_LABEL, parent=None, index_in_parent=None)
-            self.current_node = self.root
-            return
-
-        node = IRTreeNode(
-            label=label,
-            parent=self.current_node,
-            index_in_parent=len(self.current_node.children) + 1,
-        )
-        self.current_node.children.append(node)
-        self.current_node = node
-
-    def stop_example(self):
-        if self.current_node.parent is None:
-            assert self.current_node.label == TOP_LABEL
-
-        self.current_node = self.current_node.parent
-
-    def draw_value(
-        self, ir_type: IRTypeName, value: IRType, kwargs: IRKWargsType
-    ) -> None:
-        assert self.current_node is not None
-        leaf = IRTreeLeaf(ir_type=ir_type, value=value, kwargs=kwargs)
-        self.current_node.children.append(leaf)
-
-    def draw_integer(self, value: int, kwargs: IntegerKWargs) -> None:
-        self.draw_value("integer", value, kwargs)
-
-    def draw_float(self, value: float, kwargs: FloatKWargs) -> None:
-        self.draw_value("float", value, kwargs)
-
-    def draw_string(self, value: str, kwargs: StringKWargs) -> None:
-        self.draw_value("string", value, kwargs)
-
-    def draw_bytes(self, value: bytes, kwargs: BytesKWargs) -> None:
-        self.draw_value("bytes", value, kwargs)
-
-    def draw_boolean(self, value: bool, kwargs: BooleanKWargs) -> None:
-        self.draw_value("boolean", value, kwargs)
+@attr.s(slots=True)
+class IRNode:
+    ir_type: IRTypeName = attr.ib()
+    value: IRType = attr.ib()
+    kwargs: IRKWargsType = attr.ib()
+    was_forced: bool = attr.ib()
 
 
 @dataclass_transform()
@@ -1254,7 +1192,9 @@ def draw_float(
                 result = self._draw_float(
                     forced_sign_bit=forced_sign_bit, forced=forced
                 )
-                if math.copysign(1.0, result) == -1:
+                if allow_nan and math.isnan(result):
+                    clamped = result
+                elif math.copysign(1.0, result) == -1:
                     assert neg_clamper is not None
                     clamped = -neg_clamper(-result)
                 else:
@@ -1722,21 +1662,27 @@ def draw_integer(
         if forced is not None and max_value is not None:
             assert forced <= max_value
 
-        kwargs: IntegerKWargs = {
-            "min_value": min_value,
-            "max_value": max_value,
-            "weights": weights,
-            "shrink_towards": shrink_towards,
-        }
+        kwargs: IntegerKWargs = self._pooled_kwargs(
+            "integer",
+            {
+                "min_value": min_value,
+                "max_value": max_value,
+                "weights": weights,
+                "shrink_towards": shrink_towards,
+            },
+        )
+
         if self.ir_tree_leaves is not None and observe:
             forced = self._pop_ir_tree_value("integer", kwargs)
 
         value = self.provider.draw_integer(**kwargs, forced=forced)
         if observe:
             self.observer.draw_integer(
-                value, was_forced=forced is not None, kwargs=kwargs
+                value, kwargs=kwargs, was_forced=forced is not None
+            )
+            self.__example_record.record_ir_draw(
+                "integer", value, kwargs=kwargs, was_forced=forced is not None
             )
-            self.ir_tree.draw_integer(value, kwargs)
         return value
 
     def draw_float(
@@ -1763,12 +1709,16 @@ def draw_float(
                 sign_aware_lte(min_value, forced) and sign_aware_lte(forced, max_value)
             )
 
-        kwargs: FloatKWargs = {
-            "min_value": min_value,
-            "max_value": max_value,
-            "allow_nan": allow_nan,
-            "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
-        }
+        kwargs: FloatKWargs = self._pooled_kwargs(
+            "float",
+            {
+                "min_value": min_value,
+                "max_value": max_value,
+                "allow_nan": allow_nan,
+                "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
+            },
+        )
+
         if self.ir_tree_leaves is not None and observe:
             forced = self._pop_ir_tree_value("float", kwargs)
 
@@ -1777,7 +1727,9 @@ def draw_float(
             self.observer.draw_float(
                 value, kwargs=kwargs, was_forced=forced is not None
             )
-            self.ir_tree.draw_float(value, kwargs)
+            self.__example_record.record_ir_draw(
+                "float", value, kwargs=kwargs, was_forced=forced is not None
+            )
         return value
 
     def draw_string(
@@ -1791,11 +1743,14 @@ def draw_string(
     ) -> str:
         assert forced is None or min_size <= len(forced)
 
-        kwargs: StringKWargs = {
-            "intervals": intervals,
-            "min_size": min_size,
-            "max_size": max_size,
-        }
+        kwargs: StringKWargs = self._pooled_kwargs(
+            "string",
+            {
+                "intervals": intervals,
+                "min_size": min_size,
+                "max_size": max_size,
+            },
+        )
         if self.ir_tree_leaves is not None and observe:
             forced = self._pop_ir_tree_value("string", kwargs)
 
@@ -1804,7 +1759,9 @@ def draw_string(
             self.observer.draw_string(
                 value, kwargs=kwargs, was_forced=forced is not None
             )
-            self.ir_tree.draw_string(value, kwargs)
+            self.__example_record.record_ir_draw(
+                "string", value, kwargs=kwargs, was_forced=forced is not None
+            )
         return value
 
     def draw_bytes(
@@ -1818,7 +1775,7 @@ def draw_bytes(
         assert forced is None or len(forced) == size
         assert size >= 0
 
-        kwargs: BytesKWargs = {"size": size}
+        kwargs: BytesKWargs = self._pooled_kwargs("bytes", {"size": size})
 
         if self.ir_tree_leaves is not None and observe:
             forced = self._pop_ir_tree_value("bytes", kwargs)
@@ -1828,7 +1785,9 @@ def draw_bytes(
             self.observer.draw_bytes(
                 value, kwargs=kwargs, was_forced=forced is not None
             )
-            self.ir_tree.draw_bytes(value, kwargs)
+            self.__example_record.record_ir_draw(
+                "bytes", value, kwargs=kwargs, was_forced=forced is not None
+            )
         return value
 
     def draw_boolean(
@@ -1844,7 +1803,7 @@ def draw_boolean(
         if forced is False:
             assert p < (1 - 2 ** (-64))
 
-        kwargs: BooleanKWargs = {"p": p}
+        kwargs: BooleanKWargs = self._pooled_kwargs("boolean", {"p": p})
 
         if self.ir_tree_leaves is not None and observe:
             forced = self._pop_ir_tree_value("boolean", kwargs)
@@ -1854,9 +1813,31 @@ def draw_boolean(
             self.observer.draw_boolean(
                 value, kwargs=kwargs, was_forced=forced is not None
             )
-            self.ir_tree.draw_boolean(value, kwargs)
+            self.__example_record.record_ir_draw(
+                "boolean", value, kwargs=kwargs, was_forced=forced is not None
+            )
         return value
 
+    def _pooled_kwargs(self, ir_type, kwargs):
+        """Memoize common dictionary objects to reduce memory pressure."""
+        key = []
+        for k, v in kwargs.items():
+            if ir_type == "float" and k in ["min_value", "max_value"]:
+                # handle -0.0 vs 0.0, etc.
+                v = float_to_int(v)
+            elif ir_type == "integer" and k == "weights":
+                # make hashable
+                v = v if v is None else tuple(v)
+            key.append((k, v))
+
+        key = (ir_type, *sorted(key))
+
+        try:
+            return POOLED_KWARGS_CACHE[key]
+        except KeyError:
+            POOLED_KWARGS_CACHE[key] = kwargs
+            return kwargs
+
     def _pop_ir_tree_value(self, ir_type: IRTypeName, kwargs: IRKWargsType) -> IRType:
         assert self.ir_tree_leaves is not None
         leaf = self.ir_tree_leaves[self.ir_tree_leaves_index]
@@ -2090,7 +2071,7 @@ def draw_bits(self, n: int, *, forced: Optional[int] = None) -> int:
         buf = bytes(buf)
         result = int_from_bytes(buf)
 
-        self.__example_record.draw_bits(n, forced)
+        self.__example_record.draw_bits()
 
         initial = self.index
 
diff --git a/hypothesis-python/src/hypothesis/internal/filtering.py b/hypothesis-python/src/hypothesis/internal/filtering.py
index 4ba92b1da8..f352e9cb6d 100644
--- a/hypothesis-python/src/hypothesis/internal/filtering.py
+++ b/hypothesis-python/src/hypothesis/internal/filtering.py
@@ -33,7 +33,10 @@
 
 from hypothesis.internal.compat import ceil, floor
 from hypothesis.internal.floats import next_down, next_up
-from hypothesis.internal.reflection import extract_lambda_source
+from hypothesis.internal.reflection import (
+    extract_lambda_source,
+    get_pretty_function_description,
+)
 
 Ex = TypeVar("Ex")
 Predicate = Callable[[Ex], bool]
@@ -64,6 +67,10 @@ class ConstructivePredicate(NamedTuple):
     def unchanged(cls, predicate: Predicate) -> "ConstructivePredicate":
         return cls({}, predicate)
 
+    def __repr__(self) -> str:
+        fn = get_pretty_function_description(self.predicate)
+        return f"{self.__class__.__name__}(kwargs={self.kwargs!r}, predicate={fn})"
+
 
 ARG = object()
 
@@ -147,8 +154,8 @@ def merge_preds(*con_predicates: ConstructivePredicate) -> ConstructivePredicate
             elif kw["max_value"] == base["max_value"]:
                 base["exclude_max"] |= kw.get("exclude_max", False)
 
-    has_len = {"len" in kw for kw, _ in con_predicates}
-    assert len(has_len) == 1, "can't mix numeric with length constraints"
+    has_len = {"len" in kw for kw, _ in con_predicates if kw}
+    assert len(has_len) <= 1, "can't mix numeric with length constraints"
     if has_len == {True}:
         base["len"] = True
 
diff --git a/hypothesis-python/src/hypothesis/internal/intervalsets.py b/hypothesis-python/src/hypothesis/internal/intervalsets.py
index 88162b6332..e48802ee77 100644
--- a/hypothesis-python/src/hypothesis/internal/intervalsets.py
+++ b/hypothesis-python/src/hypothesis/internal/intervalsets.py
@@ -102,6 +102,9 @@ def __and__(self, other):
     def __eq__(self, other):
         return isinstance(other, IntervalSet) and (other.intervals == self.intervals)
 
+    def __hash__(self):
+        return hash(self.intervals)
+
     def union(self, other):
         """Merge two sequences of intervals into a single tuple of intervals.
 
diff --git a/hypothesis-python/src/hypothesis/stateful.py b/hypothesis-python/src/hypothesis/stateful.py
index 60cd92721c..58a2277161 100644
--- a/hypothesis-python/src/hypothesis/stateful.py
+++ b/hypothesis-python/src/hypothesis/stateful.py
@@ -15,7 +15,7 @@
 Notably, the set of steps available at any point may depend on the
 execution to date.
 """
-
+import collections
 import inspect
 from copy import copy
 from functools import lru_cache
@@ -268,7 +268,8 @@ def __init__(self) -> None:
         if not self.rules():
             raise InvalidDefinition(f"Type {type(self).__name__} defines no rules")
         self.bundles: Dict[str, list] = {}
-        self.name_counter = 1
+        self.names_counters: collections.Counter = collections.Counter()
+        self.names_list: list[str] = []
         self.names_to_values: Dict[str, Any] = {}
         self.__stream = StringIO()
         self.__printer = RepresentationPrinter(
@@ -301,15 +302,16 @@ def _pretty_print(self, value):
     def __repr__(self):
         return f"{type(self).__name__}({nicerepr(self.bundles)})"
 
-    def _new_name(self):
-        result = f"v{self.name_counter}"
-        self.name_counter += 1
+    def _new_name(self, target):
+        result = f"{target}_{self.names_counters[target]}"
+        self.names_counters[target] += 1
+        self.names_list.append(result)
         return result
 
     def _last_names(self, n):
-        assert self.name_counter > n
-        count = self.name_counter
-        return [f"v{i}" for i in range(count - n, count)]
+        len_ = len(self.names_list)
+        assert len_ >= n
+        return self.names_list[len_ - n :]
 
     def bundle(self, name):
         return self.bundles.setdefault(name, [])
@@ -364,7 +366,8 @@ def _repr_step(self, rule, data, result):
                 if len(result.values) == 1:
                     output_assignment = f"({self._last_names(1)[0]},) = "
                 elif result.values:
-                    output_names = self._last_names(len(result.values))
+                    number_of_last_names = len(rule.targets) * len(result.values)
+                    output_names = self._last_names(number_of_last_names)
                     output_assignment = ", ".join(output_names) + " = "
             else:
                 output_assignment = self._last_names(1)[0] + " = "
@@ -372,12 +375,14 @@ def _repr_step(self, rule, data, result):
         return f"{output_assignment}state.{rule.function.__name__}({args})"
 
     def _add_result_to_targets(self, targets, result):
-        name = self._new_name()
-        self.__printer.singleton_pprinters.setdefault(
-            id(result), lambda obj, p, cycle: p.text(name)
-        )
-        self.names_to_values[name] = result
         for target in targets:
+            name = self._new_name(target)
+
+            def printer(obj, p, cycle, name=name):
+                return p.text(name)
+
+            self.__printer.singleton_pprinters.setdefault(id(result), printer)
+            self.names_to_values[name] = result
             self.bundles.setdefault(target, []).append(VarReference(name))
 
     def check_invariants(self, settings, output, runtimes):
diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/collections.py b/hypothesis-python/src/hypothesis/strategies/_internal/collections.py
index e8f8f21ba4..75de4a82ec 100644
--- a/hypothesis-python/src/hypothesis/strategies/_internal/collections.py
+++ b/hypothesis-python/src/hypothesis/strategies/_internal/collections.py
@@ -23,7 +23,7 @@
     T4,
     T5,
     Ex,
-    MappedSearchStrategy,
+    MappedStrategy,
     SearchStrategy,
     T,
     check_strategy,
@@ -211,6 +211,9 @@ def filter(self, condition):
             new = copy.copy(self)
             new.min_size = max(self.min_size, kwargs.get("min_value", self.min_size))
             new.max_size = min(self.max_size, kwargs.get("max_value", self.max_size))
+            # Unsatisfiable filters are easiest to understand without rewriting.
+            if new.min_size > new.max_size:
+                return SearchStrategy.filter(self, condition)
             # Recompute average size; this is cheaper than making it into a property.
             new.average_size = min(
                 max(new.min_size * 2, new.min_size + 5),
@@ -302,7 +305,7 @@ def do_draw(self, data):
         return result
 
 
-class FixedKeysDictStrategy(MappedSearchStrategy):
+class FixedKeysDictStrategy(MappedStrategy):
     """A strategy which produces dicts with a fixed set of keys, given a
     strategy for each of their equivalent values.
 
@@ -311,9 +314,12 @@ class FixedKeysDictStrategy(MappedSearchStrategy):
     """
 
     def __init__(self, strategy_dict):
-        self.dict_type = type(strategy_dict)
+        dict_type = type(strategy_dict)
         self.keys = tuple(strategy_dict.keys())
-        super().__init__(strategy=TupleStrategy(strategy_dict[k] for k in self.keys))
+        super().__init__(
+            strategy=TupleStrategy(strategy_dict[k] for k in self.keys),
+            pack=lambda value: dict_type(zip(self.keys, value)),
+        )
 
     def calc_is_empty(self, recur):
         return recur(self.mapped_strategy)
@@ -321,9 +327,6 @@ def calc_is_empty(self, recur):
     def __repr__(self):
         return f"FixedKeysDictStrategy({self.keys!r}, {self.mapped_strategy!r})"
 
-    def pack(self, value):
-        return self.dict_type(zip(self.keys, value))
-
 
 class FixedAndOptionalKeysDictStrategy(SearchStrategy):
     """A strategy which produces dicts with a fixed set of keys, given a
diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/core.py b/hypothesis-python/src/hypothesis/strategies/_internal/core.py
index 1efe75caec..83edacbfef 100644
--- a/hypothesis-python/src/hypothesis/strategies/_internal/core.py
+++ b/hypothesis-python/src/hypothesis/strategies/_internal/core.py
@@ -134,7 +134,7 @@
     one_of,
 )
 from hypothesis.strategies._internal.strings import (
-    FixedSizeBytes,
+    BytesStrategy,
     OneCharStringStrategy,
     TextStrategy,
 )
@@ -963,11 +963,7 @@ def binary(
     values.
     """
     check_valid_sizes(min_size, max_size)
-    if min_size == max_size:
-        return FixedSizeBytes(min_size)
-    return lists(
-        integers(min_value=0, max_value=255), min_size=min_size, max_size=max_size
-    ).map(bytes)
+    return BytesStrategy(min_size, max_size)
 
 
 @cacheable
diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/lazy.py b/hypothesis-python/src/hypothesis/strategies/_internal/lazy.py
index d6bb13c7c1..8f887293e6 100644
--- a/hypothesis-python/src/hypothesis/strategies/_internal/lazy.py
+++ b/hypothesis-python/src/hypothesis/strategies/_internal/lazy.py
@@ -61,10 +61,6 @@ def unwrap_strategies(s):
         assert unwrap_depth >= 0
 
 
-def _repr_filter(condition):
-    return f".filter({get_pretty_function_description(condition)})"
-
-
 class LazyStrategy(SearchStrategy):
     """A strategy which is defined purely by conversion to and from another
     strategy.
@@ -72,14 +68,14 @@ class LazyStrategy(SearchStrategy):
     Its parameter and distribution come from that other strategy.
     """
 
-    def __init__(self, function, args, kwargs, filters=(), *, force_repr=None):
+    def __init__(self, function, args, kwargs, *, transforms=(), force_repr=None):
         super().__init__()
         self.__wrapped_strategy = None
         self.__representation = force_repr
         self.function = function
         self.__args = args
         self.__kwargs = kwargs
-        self.__filters = filters
+        self._transformations = transforms
 
     @property
     def supports_find(self):
@@ -115,23 +111,28 @@ def wrapped_strategy(self):
                 self.__wrapped_strategy = self.function(
                     *unwrapped_args, **unwrapped_kwargs
                 )
-            for f in self.__filters:
-                self.__wrapped_strategy = self.__wrapped_strategy.filter(f)
+            for method, fn in self._transformations:
+                self.__wrapped_strategy = getattr(self.__wrapped_strategy, method)(fn)
         return self.__wrapped_strategy
 
-    def filter(self, condition):
-        try:
-            repr_ = f"{self!r}{_repr_filter(condition)}"
-        except Exception:
-            repr_ = None
-        return LazyStrategy(
+    def __with_transform(self, method, fn):
+        repr_ = self.__representation
+        if repr_:
+            repr_ = f"{repr_}.{method}({get_pretty_function_description(fn)})"
+        return type(self)(
             self.function,
             self.__args,
             self.__kwargs,
-            (*self.__filters, condition),
+            transforms=(*self._transformations, (method, fn)),
             force_repr=repr_,
         )
 
+    def map(self, pack):
+        return self.__with_transform("map", pack)
+
+    def filter(self, condition):
+        return self.__with_transform("filter", condition)
+
     def do_validate(self):
         w = self.wrapped_strategy
         assert isinstance(w, SearchStrategy), f"{self!r} returned non-strategy {w!r}"
@@ -156,7 +157,10 @@ def __repr__(self):
             }
             self.__representation = repr_call(
                 self.function, _args, kwargs_for_repr, reorder=False
-            ) + "".join(map(_repr_filter, self.__filters))
+            ) + "".join(
+                f".{method}({get_pretty_function_description(fn)})"
+                for method, fn in self._transformations
+            )
         return self.__representation
 
     def do_draw(self, data):
diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py b/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py
index af2fa72937..46d4005cdb 100644
--- a/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py
+++ b/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py
@@ -11,6 +11,7 @@
 import sys
 import warnings
 from collections import abc, defaultdict
+from functools import lru_cache
 from random import shuffle
 from typing import (
     Any,
@@ -60,7 +61,7 @@
 calculating = UniqueIdentifier("calculating")
 
 MAPPED_SEARCH_STRATEGY_DO_DRAW_LABEL = calc_label_from_name(
-    "another attempted draw in MappedSearchStrategy"
+    "another attempted draw in MappedStrategy"
 )
 
 FILTERED_SEARCH_STRATEGY_DO_DRAW_LABEL = calc_label_from_name(
@@ -346,7 +347,7 @@ def map(self, pack: Callable[[Ex], T]) -> "SearchStrategy[T]":
         """
         if is_identity_function(pack):
             return self  # type: ignore  # Mypy has no way to know that `Ex == T`
-        return MappedSearchStrategy(pack=pack, strategy=self)
+        return MappedStrategy(self, pack=pack)
 
     def flatmap(
         self, expand: Callable[[Ex], "SearchStrategy[T]"]
@@ -468,9 +469,6 @@ class SampledFromStrategy(SearchStrategy):
     """A strategy which samples from a set of elements. This is essentially
     equivalent to using a OneOfStrategy over Just strategies but may be more
     efficient and convenient.
-
-    The conditional distribution chooses uniformly at random from some
-    non-empty subset of the elements.
     """
 
     _MAX_FILTER_CALLS = 10_000
@@ -521,7 +519,10 @@ def _transform(self, element):
         # Used in UniqueSampledListStrategy
         for name, f in self._transformations:
             if name == "map":
-                element = f(element)
+                result = f(element)
+                if build_context := _current_build_context.value:
+                    build_context.record_call(result, f, [element], {})
+                element = result
             else:
                 assert name == "filter"
                 if not f(element):
@@ -794,18 +795,17 @@ def one_of(
     return OneOfStrategy(args)
 
 
-class MappedSearchStrategy(SearchStrategy[Ex]):
+class MappedStrategy(SearchStrategy[Ex]):
     """A strategy which is defined purely by conversion to and from another
     strategy.
 
     Its parameter and distribution come from that other strategy.
     """
 
-    def __init__(self, strategy, pack=None):
+    def __init__(self, strategy, pack):
         super().__init__()
         self.mapped_strategy = strategy
-        if pack is not None:
-            self.pack = pack
+        self.pack = pack
 
     def calc_is_empty(self, recur):
         return recur(self.mapped_strategy)
@@ -821,11 +821,6 @@ def __repr__(self):
     def do_validate(self):
         self.mapped_strategy.validate()
 
-    def pack(self, x):
-        """Take a value produced by the underlying mapped_strategy and turn it
-        into a value suitable for outputting from this strategy."""
-        raise NotImplementedError(f"{self.__class__.__name__}.pack()")
-
     def do_draw(self, data: ConjectureData) -> Any:
         with warnings.catch_warnings():
             if isinstance(self.pack, type) and issubclass(
@@ -847,10 +842,67 @@ def do_draw(self, data: ConjectureData) -> Any:
     @property
     def branches(self) -> List[SearchStrategy[Ex]]:
         return [
-            MappedSearchStrategy(pack=self.pack, strategy=strategy)
+            MappedStrategy(strategy, pack=self.pack)
             for strategy in self.mapped_strategy.branches
         ]
 
+    def filter(self, condition: Callable[[Ex], Any]) -> "SearchStrategy[Ex]":
+        # Includes a special case so that we can rewrite filters on collection
+        # lengths, when most collections are `st.lists(...).map(the_type)`.
+        ListStrategy = _list_strategy_type()
+        if not isinstance(self.mapped_strategy, ListStrategy) or not (
+            (isinstance(self.pack, type) and issubclass(self.pack, abc.Collection))
+            or self.pack in _collection_ish_functions()
+        ):
+            return super().filter(condition)
+
+        # Check whether our inner list strategy can rewrite this filter condition.
+        # If not, discard the result and _only_ apply a new outer filter.
+        new = ListStrategy.filter(self.mapped_strategy, condition)
+        if getattr(new, "filtered_strategy", None) is self.mapped_strategy:
+            return super().filter(condition)  # didn't rewrite
+
+        # Apply a new outer filter even though we rewrote the inner strategy,
+        # because some collections can change the list length (dict, set, etc).
+        return FilteredStrategy(type(self)(new, self.pack), conditions=(condition,))
+
+
+@lru_cache
+def _list_strategy_type():
+    from hypothesis.strategies._internal.collections import ListStrategy
+
+    return ListStrategy
+
+
+def _collection_ish_functions():
+    funcs = [sorted]
+    if np := sys.modules.get("numpy"):
+        # c.f. https://numpy.org/doc/stable/reference/routines.array-creation.html
+        # Probably only `np.array` and `np.asarray` will be used in practice,
+        # but why should that stop us when we've already gone this far?
+        funcs += [
+            np.empty_like,
+            np.eye,
+            np.identity,
+            np.ones_like,
+            np.zeros_like,
+            np.array,
+            np.asarray,
+            np.asanyarray,
+            np.ascontiguousarray,
+            np.asmatrix,
+            np.copy,
+            np.rec.array,
+            np.rec.fromarrays,
+            np.rec.fromrecords,
+            np.diag,
+            # bonus undocumented functions from tab-completion:
+            np.asarray_chkfinite,
+            np.asfarray,
+            np.asfortranarray,
+        ]
+    return funcs
+
 
 filter_not_satisfied = UniqueIdentifier("filter not satisfied")
 
diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/strings.py b/hypothesis-python/src/hypothesis/strategies/_internal/strings.py
index 021ce3c6e6..8df955e632 100644
--- a/hypothesis-python/src/hypothesis/strategies/_internal/strings.py
+++ b/hypothesis-python/src/hypothesis/strategies/_internal/strings.py
@@ -11,14 +11,17 @@
 import copy
 import re
 import warnings
-from functools import lru_cache
+from functools import lru_cache, partial
 
 from hypothesis.errors import HypothesisWarning, InvalidArgument
 from hypothesis.internal import charmap
+from hypothesis.internal.filtering import max_len, min_len
 from hypothesis.internal.intervalsets import IntervalSet
 from hypothesis.strategies._internal.collections import ListStrategy
 from hypothesis.strategies._internal.lazy import unwrap_strategies
+from hypothesis.strategies._internal.numbers import IntegersStrategy
 from hypothesis.strategies._internal.strategies import SearchStrategy
+from hypothesis.vendor.pretty import pretty
 
 
 class OneCharStringStrategy(SearchStrategy):
@@ -76,6 +79,33 @@ def do_draw(self, data):
         return data.draw_string(self.intervals, min_size=1, max_size=1)
 
 
+_nonempty_names = (
+    "capitalize",
+    "expandtabs",
+    "join",
+    "lower",
+    "rsplit",
+    "split",
+    "splitlines",
+    "swapcase",
+    "title",
+    "upper",
+)
+_nonempty_and_content_names = (
+    "islower",
+    "isupper",
+    "isalnum",
+    "isalpha",
+    "isascii",
+    "isdigit",
+    "isspace",
+    "istitle",
+    "lstrip",
+    "rstrip",
+    "strip",
+)
+
+
 class TextStrategy(ListStrategy):
     def do_draw(self, data):
         # if our element strategy is OneCharStringStrategy, we can skip the
@@ -104,44 +134,17 @@ def __repr__(self):
     _nonempty_filters = (
         *ListStrategy._nonempty_filters,
         str,
-        str.capitalize,
         str.casefold,
         str.encode,
-        str.expandtabs,
-        str.join,
-        str.lower,
-        str.rsplit,
-        str.split,
-        str.splitlines,
-        str.swapcase,
-        str.title,
-        str.upper,
+        *(getattr(str, n) for n in _nonempty_names),
     )
     _nonempty_and_content_filters = (
-        str.isidentifier,
-        str.islower,
-        str.isupper,
-        str.isalnum,
-        str.isalpha,
-        str.isascii,
         str.isdecimal,
-        str.isdigit,
         str.isnumeric,
-        str.isspace,
-        str.istitle,
-        str.lstrip,
-        str.rstrip,
-        str.strip,
+        *(getattr(str, n) for n in _nonempty_and_content_names),
     )
 
     def filter(self, condition):
-        if condition in (str.lower, str.title, str.upper):
-            warnings.warn(
-                f"You applied str.{condition.__name__} as a filter, but this allows "
-                f"all nonempty strings!  Did you mean str.is{condition.__name__}?",
-                HypothesisWarning,
-                stacklevel=2,
-            )
         elems = unwrap_strategies(self.element_strategy)
         if (
             condition is str.isidentifier
@@ -163,17 +166,76 @@ def filter(self, condition):
                 ),
                 # Filter to ensure that NFKC normalization keeps working in future
             ).filter(str.isidentifier)
+        if (new := _string_filter_rewrite(self, str, condition)) is not None:
+            return new
+        return super().filter(condition)
 
-        # We use ListStrategy filter logic for the conditions that *only* imply
-        # the string is nonempty.  Here, we increment the min_size but still apply
-        # the filter for conditions that imply nonempty *and specific contents*.
-        if condition in self._nonempty_and_content_filters:
-            assert self.max_size >= 1, "Always-empty is special cased in st.text()"
-            self = copy.copy(self)
-            self.min_size = max(1, self.min_size)
-            return ListStrategy.filter(self, condition)
 
-        return super().filter(condition)
+def _string_filter_rewrite(self, kind, condition):
+    if condition in (kind.lower, kind.title, kind.upper):
+        k = kind.__name__
+        warnings.warn(
+            f"You applied {k}.{condition.__name__} as a filter, but this allows "
+            f"all nonempty strings!  Did you mean {k}.is{condition.__name__}?",
+            HypothesisWarning,
+            stacklevel=2,
+        )
+
+    elems = unwrap_strategies(self.element_strategy)
+    if (
+        (kind is bytes or isinstance(elems, OneCharStringStrategy))
+        and isinstance(pattern := getattr(condition, "__self__", None), re.Pattern)
+        and isinstance(pattern.pattern, kind)
+    ):
+        from hypothesis.strategies._internal.regex import regex_strategy
+
+        print(f"{condition=}")
+        print(f"{condition.__name__=}")
+
+        if condition.__name__ == "match":
+            # Replace with an easier-to-handle equivalent condition
+            caret = "^" if kind is str else b"^"
+            pattern = re.compile(caret + pattern.pattern, flags=pattern.flags)
+            condition = pattern.search
+
+        if condition.__name__ in ("search", "findall", "fullmatch"):
+            s = regex_strategy(
+                pattern,
+                fullmatch=condition.__name__ == "fullmatch",
+                alphabet=self.element_strategy if kind is str else None,
+            )
+            if self.min_size > 0:
+                s = s.filter(partial(min_len, self.min_size))
+            if self.max_size < 1e999:
+                s = s.filter(partial(max_len, self.max_size))
+            return s
+        elif condition.__name__ in ("finditer", "scanner"):
+            # PyPy implements `finditer` as an alias to their `scanner` method
+            warnings.warn(
+                f"You applied {pretty(condition)} as a filter, but this allows "
+                f"any string at all!  Did you mean .findall ?",
+                HypothesisWarning,
+                stacklevel=3,
+            )
+            return self
+        elif condition.__name__ == "split":
+            warnings.warn(
+                f"You applied {pretty(condition)} as a filter, but this allows "
+                f"any nonempty string!  Did you mean .search ?",
+                HypothesisWarning,
+                stacklevel=3,
+            )
+            return self.filter(bool)
+
+    # We use ListStrategy filter logic for the conditions that *only* imply
+    # the string is nonempty.  Here, we increment the min_size but still apply
+    # the filter for conditions that imply nonempty *and specific contents*.
+    if condition in self._nonempty_and_content_filters and self.max_size >= 1:
+        self = copy.copy(self)
+        self.min_size = max(1, self.min_size)
+        return ListStrategy.filter(self, condition)
+
+    return None
 
 
 # Excerpted from https://www.unicode.org/Public/15.0.0/ucd/PropList.txt
@@ -229,9 +291,26 @@ def _identifier_characters():
     return id_start, id_continue
 
 
-class FixedSizeBytes(SearchStrategy):
-    def __init__(self, size):
-        self.size = size
+class BytesStrategy(ListStrategy):
+    def __init__(self, min_size, max_size):
+        super().__init__(IntegersStrategy(0, 255), min_size=min_size, max_size=max_size)
 
     def do_draw(self, data):
-        return bytes(data.draw_bytes(self.size))
+        # TODO: refactor the underlying provider to support variable-length bytes
+        if self.min_size == self.max_size:
+            return bytes(data.draw_bytes(self.min_size))
+        return bytes(super().do_draw(data))
+
+    _nonempty_filters = (
+        *ListStrategy._nonempty_filters,
+        bytes,
+        *(getattr(bytes, n) for n in _nonempty_names),
+    )
+    _nonempty_and_content_filters = (
+        *(getattr(bytes, n) for n in _nonempty_and_content_names),
+    )
+
+    def filter(self, condition):
+        if (new := _string_filter_rewrite(self, bytes, condition)) is not None:
+            return new
+        return super().filter(condition)
diff --git a/hypothesis-python/src/hypothesis/vendor/pretty.py b/hypothesis-python/src/hypothesis/vendor/pretty.py
index 35451b9961..ceffe3a6aa 100644
--- a/hypothesis-python/src/hypothesis/vendor/pretty.py
+++ b/hypothesis-python/src/hypothesis/vendor/pretty.py
@@ -747,7 +747,7 @@ def _repr_float_counting_nans(obj, p, cycle):
     type: _type_pprint,
     types.FunctionType: _function_pprint,
     types.BuiltinFunctionType: _function_pprint,
-    types.MethodType: _repr_pprint,
+    types.MethodType: _function_pprint,
     datetime.datetime: _repr_pprint,
     datetime.timedelta: _repr_pprint,
     BaseException: _exception_pprint,
diff --git a/hypothesis-python/src/hypothesis/version.py b/hypothesis-python/src/hypothesis/version.py
index da7f74708c..22bdf94369 100644
--- a/hypothesis-python/src/hypothesis/version.py
+++ b/hypothesis-python/src/hypothesis/version.py
@@ -8,5 +8,5 @@
 # v. 2.0. If a copy of the MPL was not distributed with this file, You can
 # obtain one at https://mozilla.org/MPL/2.0/.
 
-__version_info__ = (6, 98, 11)
+__version_info__ = (6, 98, 17)
 __version__ = ".".join(map(str, __version_info__))
diff --git a/hypothesis-python/tests/array_api/README.md b/hypothesis-python/tests/array_api/README.md
index bc2f4d1aa2..f841269d7f 100644
--- a/hypothesis-python/tests/array_api/README.md
+++ b/hypothesis-python/tests/array_api/README.md
@@ -3,7 +3,7 @@ This folder contains tests for `hypothesis.extra.array_api`.
 ## Mocked array module
 
 A mock of the Array API namespace exists as `mock_xp` in `extra.array_api`. This
-wraps NumPy-proper to conform it to the *draft* spec, where `numpy.array_api`
+wraps NumPy-proper to conform it to the *draft* spec, where `array_api_strict`
 might not. This is not a fully compliant wrapper, but conforms enough for the
 purposes of testing.
 
@@ -21,7 +21,7 @@ If neither of these, the test suite will then try resolve the variable like so:
 1. If the variable matches a name of an available entry point, load said entry point.
 2. If the variables matches a valid import path, import said path.
 
-For example, to specify NumPy's Array API implementation, you could use its
+For example, to specify NumPy's Array API implementation[^1], you could use its
 entry point (**1.**),
 
     HYPOTHESIS_TEST_ARRAY_API=numpy pytest tests/array_api
@@ -48,3 +48,6 @@ Otherwise the test suite will use the variable as the `api_version` argument for
 In the future we intend to support running tests against multiple API versioned
 namespaces, likely with an additional recognized option that infers all
 supported versions.
+
+[^1]: Note NumPy will likely remove `numpy.array_api` in the future ([NEP 56](https://github.com/numpy/numpy/pull/25542))
+in favour of the third-party [`array-api-strict`](https://github.com/data-apis/array-api-strict) library.
\ No newline at end of file
diff --git a/hypothesis-python/tests/conjecture/common.py b/hypothesis-python/tests/conjecture/common.py
index 7563cade20..9110d2d79b 100644
--- a/hypothesis-python/tests/conjecture/common.py
+++ b/hypothesis-python/tests/conjecture/common.py
@@ -10,9 +10,10 @@
 
 import math
 from contextlib import contextmanager
-from random import Random
 
 from hypothesis import HealthCheck, assume, settings, strategies as st
+from hypothesis.control import current_build_context
+from hypothesis.errors import InvalidArgument
 from hypothesis.internal.conjecture import engine as engine_module
 from hypothesis.internal.conjecture.data import ConjectureData, Status
 from hypothesis.internal.conjecture.engine import BUFFER_SIZE, ConjectureRunner
@@ -75,8 +76,30 @@ def accept(f):
     return accept
 
 
-def fresh_data(*, observer=None) -> ConjectureData:
-    return ConjectureData(BUFFER_SIZE, prefix=b"", random=Random(), observer=observer)
+def fresh_data(*, random=None, observer=None) -> ConjectureData:
+    if random is None:
+        try:
+            context = current_build_context()
+        except InvalidArgument:
+            # ensure usage of fresh_data() is not flaky outside of property tests.
+            raise ValueError(
+                "must pass a seeded Random instance to fresh_data() when "
+                "outside of a build context"
+            ) from None
+
+        # within property tests, ensure fresh_data uses a controlled source of
+        # randomness.
+        # drawing this from the current build context is almost *too* magical. But
+        # the alternative is an extra @given(st.randoms()) everywhere we use
+        # fresh_data, so eh.
+        random = context.data.draw(st.randoms())
+
+    return ConjectureData(
+        BUFFER_SIZE,
+        prefix=b"",
+        random=random,
+        observer=observer,
+    )
 
 
 @st.composite
@@ -226,3 +249,14 @@ def draw_boolean_kwargs(draw, *, use_forced=False):
         assume(bits <= 64)
 
     return {"p": p, "forced": forced}
+
+
+def ir_types_and_kwargs():
+    options = [
+        ("boolean", draw_boolean_kwargs()),
+        ("integer", draw_integer_kwargs()),
+        ("float", draw_float_kwargs()),
+        ("bytes", draw_bytes_kwargs()),
+        ("string", draw_string_kwargs()),
+    ]
+    return st.one_of(st.tuples(st.just(name), kws) for name, kws in options)
diff --git a/hypothesis-python/tests/conjecture/test_forced.py b/hypothesis-python/tests/conjecture/test_forced.py
index 8902b2fc8a..97994434d8 100644
--- a/hypothesis-python/tests/conjecture/test_forced.py
+++ b/hypothesis-python/tests/conjecture/test_forced.py
@@ -205,3 +205,23 @@ def test(kwargs):
         assert float_to_lex(abs(drawn)) == float_to_lex(abs(forced))
 
     test()
+
+
+@pytest.mark.parametrize("sign", [1, -1])
+@pytest.mark.parametrize(
+    "min_value, max_value",
+    [
+        (0.0, 0.0),
+        (-0.0, -0.0),
+        (0.0, 100.0),
+        (-100.0, -0.0),
+        (5.0, 10.0),
+        (-10.0, -5.0),
+    ],
+)
+@given(random=st.randoms())
+def test_forced_floats_with_nan(random, sign, min_value, max_value):
+    # nans with a sign opposite of both bounds previously gave us trouble
+    # trying to use float clampers that didn't exist when drawing.
+    data = fresh_data(random=random)
+    data.draw_float(min_value=min_value, max_value=max_value, forced=sign * math.nan)
diff --git a/hypothesis-python/tests/conjecture/test_ir.py b/hypothesis-python/tests/conjecture/test_ir.py
index 4f2651465a..ee3edcf7f2 100644
--- a/hypothesis-python/tests/conjecture/test_ir.py
+++ b/hypothesis-python/tests/conjecture/test_ir.py
@@ -9,37 +9,16 @@
 # obtain one at https://mozilla.org/MPL/2.0/.
 
 from hypothesis import assume, example, given, strategies as st
+from hypothesis.internal.conjecture.data import IRNode
 from hypothesis.internal.conjecture.datatree import (
     MAX_CHILDREN_EFFECTIVELY_INFINITE,
     all_children,
     compute_max_children,
 )
-from hypothesis.internal.floats import next_down, next_up
+from hypothesis.internal.floats import SMALLEST_SUBNORMAL, next_down, next_up
 from hypothesis.internal.intervalsets import IntervalSet
 
-from tests.conjecture.common import (
-    draw_boolean_kwargs,
-    draw_bytes_kwargs,
-    draw_float_kwargs,
-    draw_integer_kwargs,
-    draw_string_kwargs,
-    fresh_data,
-)
-
-
-@st.composite
-def ir_types_and_kwargs(draw):
-    ir_type = draw(st.sampled_from(["integer", "bytes", "float", "string", "boolean"]))
-    kwargs_strategy = {
-        "integer": draw_integer_kwargs(),
-        "bytes": draw_bytes_kwargs(),
-        "float": draw_float_kwargs(),
-        "string": draw_string_kwargs(),
-        "boolean": draw_boolean_kwargs(),
-    }[ir_type]
-    kwargs = draw(kwargs_strategy)
-
-    return (ir_type, kwargs)
+from tests.conjecture.common import fresh_data, ir_types_and_kwargs
 
 
 # we max out at 128 bit integers in the *unbounded* case, but someone may
@@ -154,3 +133,66 @@ def test_compute_max_children_and_all_children_agree(ir_type_and_kwargs):
     cap = min(100_000, MAX_CHILDREN_EFFECTIVELY_INFINITE)
     assume(max_children < cap)
     assert len(list(all_children(ir_type, kwargs))) == max_children
+
+
+@given(st.randoms())
+def test_ir_nodes(random):
+    data = fresh_data(random=random)
+    data.draw_float(min_value=-10.0, max_value=10.0, forced=5.0)
+    data.draw_boolean(forced=True)
+
+    data.start_example(42)
+    data.draw_string(IntervalSet.from_string("abcd"), forced="abbcccdddd")
+    data.draw_bytes(8, forced=bytes(8))
+    data.stop_example()
+
+    data.draw_integer(0, 100, forced=50)
+
+    data.freeze()
+    expected_tree_nodes = [
+        IRNode(
+            ir_type="float",
+            value=5.0,
+            kwargs={
+                "min_value": -10.0,
+                "max_value": 10.0,
+                "allow_nan": True,
+                "smallest_nonzero_magnitude": SMALLEST_SUBNORMAL,
+            },
+            was_forced=True,
+        ),
+        IRNode(
+            ir_type="boolean",
+            value=True,
+            kwargs={"p": 0.5},
+            was_forced=True,
+        ),
+        IRNode(
+            ir_type="string",
+            value="abbcccdddd",
+            kwargs={
+                "intervals": IntervalSet.from_string("abcd"),
+                "min_size": 0,
+                "max_size": None,
+            },
+            was_forced=True,
+        ),
+        IRNode(
+            ir_type="bytes",
+            value=bytes(8),
+            kwargs={"size": 8},
+            was_forced=True,
+        ),
+        IRNode(
+            ir_type="integer",
+            value=50,
+            kwargs={
+                "min_value": 0,
+                "max_value": 100,
+                "weights": None,
+                "shrink_towards": 0,
+            },
+            was_forced=True,
+        ),
+    ]
+    assert data.examples.ir_tree_nodes == expected_tree_nodes
diff --git a/hypothesis-python/tests/conjecture/test_test_data.py b/hypothesis-python/tests/conjecture/test_test_data.py
index 5186fc632f..abdc217baf 100644
--- a/hypothesis-python/tests/conjecture/test_test_data.py
+++ b/hypothesis-python/tests/conjecture/test_test_data.py
@@ -347,7 +347,7 @@ def test_can_override_label():
 def test_will_mark_too_deep_examples_as_invalid():
     d = ConjectureData.for_buffer(bytes(0))
 
-    s = st.none()
+    s = st.integers()
     for _ in range(MAX_DEPTH + 1):
         s = s.map(lambda x: None)
 
diff --git a/hypothesis-python/tests/cover/test_compat.py b/hypothesis-python/tests/cover/test_compat.py
index 7942a95135..b4bf703562 100644
--- a/hypothesis-python/tests/cover/test_compat.py
+++ b/hypothesis-python/tests/cover/test_compat.py
@@ -18,6 +18,7 @@
 import pytest
 
 from hypothesis.internal.compat import (
+    add_note,
     ceil,
     dataclass_asdict,
     extract_bits,
@@ -143,3 +144,12 @@ def test_extract_bits_roundtrip(width, x):
     if width is not None:
         assert len(bits) == width
     assert x == sum(v << p for p, v in enumerate(reversed(bits)))
+
+
+@dataclass(frozen=True)
+class ImmutableError:
+    msg: str
+
+
+def test_add_note_fails_gracefully_on_frozen_instance():
+    add_note(ImmutableError("msg"), "some note")
diff --git a/hypothesis-python/tests/cover/test_custom_reprs.py b/hypothesis-python/tests/cover/test_custom_reprs.py
index 384044ed65..9da453d2a3 100644
--- a/hypothesis-python/tests/cover/test_custom_reprs.py
+++ b/hypothesis-python/tests/cover/test_custom_reprs.py
@@ -79,7 +79,7 @@ class Bar(Foo):
 
 def test_reprs_as_created():
     @given(foo=st.builds(Foo), bar=st.from_type(Bar), baz=st.none().map(Foo))
-    @settings(print_blob=False, max_examples=10_000)
+    @settings(print_blob=False, max_examples=10_000, derandomize=True)
     def inner(foo, bar, baz):
         assert baz.x is None
         assert foo.x <= 0 or bar.x >= 0
diff --git a/hypothesis-python/tests/cover/test_filter_rewriting.py b/hypothesis-python/tests/cover/test_filter_rewriting.py
index d6b36d43ce..ab4ec78f19 100644
--- a/hypothesis-python/tests/cover/test_filter_rewriting.py
+++ b/hypothesis-python/tests/cover/test_filter_rewriting.py
@@ -11,13 +11,14 @@
 import decimal
 import math
 import operator
+import re
 from fractions import Fraction
 from functools import partial
 from sys import float_info
 
 import pytest
 
-from hypothesis import given, strategies as st
+from hypothesis import HealthCheck, given, settings, strategies as st
 from hypothesis.errors import HypothesisWarning, Unsatisfiable
 from hypothesis.internal.filtering import max_len, min_len
 from hypothesis.internal.floats import next_down, next_up
@@ -25,12 +26,14 @@
 from hypothesis.strategies._internal.core import data
 from hypothesis.strategies._internal.lazy import LazyStrategy, unwrap_strategies
 from hypothesis.strategies._internal.numbers import FloatStrategy, IntegersStrategy
-from hypothesis.strategies._internal.strategies import FilteredStrategy
+from hypothesis.strategies._internal.strategies import FilteredStrategy, MappedStrategy
 from hypothesis.strategies._internal.strings import TextStrategy
 
 from tests.common.debug import check_can_generate_examples
 from tests.common.utils import fails_with
 
+A_FEW = 15  # speed up massively-parametrized tests
+
 
 @pytest.mark.parametrize(
     "strategy, predicate, start, end",
@@ -84,6 +87,7 @@
     ],
     ids=get_pretty_function_description,
 )
+@settings(max_examples=A_FEW)
 @given(data=st.data())
 def test_filter_rewriting_ints(data, strategy, predicate, start, end):
     s = strategy.filter(predicate)
@@ -147,6 +151,7 @@ def test_filter_rewriting_ints(data, strategy, predicate, start, end):
     ],
     ids=get_pretty_function_description,
 )
+@settings(max_examples=A_FEW)
 @given(data=st.data())
 def test_filter_rewriting_floats(data, strategy, predicate, min_value, max_value):
     s = strategy.filter(predicate)
@@ -257,6 +262,7 @@ def mod2(x):
         ]
     ),
 )
+@settings(suppress_health_check=[HealthCheck.too_slow])
 def test_rewrite_filter_chains_with_some_unhandled(data, predicates, s):
     # Set up our strategy
     for p in predicates:
@@ -405,6 +411,7 @@ def test_filter_floats_can_skip_subnormals(op, attr, value, expected):
     ],
     ids=get_pretty_function_description,
 )
+@settings(max_examples=A_FEW)
 @given(data=st.data())
 def test_filter_rewriting_text_partial_len(data, strategy, predicate, start, end):
     s = strategy.filter(predicate)
@@ -469,30 +476,44 @@ def test_can_rewrite_multiple_length_filters_if_not_lambdas(data):
         st.lists(st.integers()),
         st.lists(st.integers(), unique=True),
         st.lists(st.sampled_from([1, 2, 3])),
-        # TODO: support more collection types.  Might require messing around with
-        #       strategy internals, e.g. in MappedStrategy/FilteredStrategy.
-        # st.binary(),
-        # st.binary.map(bytearray),
-        # st.sets(st.integers()),
-        # st.dictionaries(st.integers(), st.none()),
+        st.binary(),
+        st.sets(st.integers()),
+        st.frozensets(st.integers()),
+        st.dictionaries(st.integers(), st.none()),
+        st.lists(st.integers(), unique_by=lambda x: x % 17).map(tuple),
     ],
     ids=get_pretty_function_description,
 )
+@settings(max_examples=A_FEW)
 @given(data=st.data())
 def test_filter_rewriting_text_lambda_len(data, strategy, predicate, start, end):
     s = strategy.filter(predicate)
+    unwrapped_nofilter = unwrap_strategies(strategy)
     unwrapped = unwrap_strategies(s)
-    assert isinstance(unwrapped, FilteredStrategy)
-    assert isinstance(unwrapped.filtered_strategy, type(unwrap_strategies(strategy)))
+
+    if was_mapped := isinstance(unwrapped, MappedStrategy):
+        unwrapped = unwrapped.mapped_strategy
+
+    assert isinstance(unwrapped, FilteredStrategy), f"{unwrapped=} {type(unwrapped)=}"
+    assert isinstance(
+        unwrapped.filtered_strategy,
+        type(unwrapped_nofilter.mapped_strategy if was_mapped else unwrapped_nofilter),
+    )
     for pred in unwrapped.flat_conditions:
         assert pred.__name__ == "<lambda>"
 
+    if isinstance(unwrapped.filtered_strategy, MappedStrategy):
+        unwrapped = unwrapped.filtered_strategy.mapped_strategy
+
     assert unwrapped.filtered_strategy.min_size == start
     assert unwrapped.filtered_strategy.max_size == end
     value = data.draw(s)
     assert predicate(value)
 
 
+two = 2
+
+
 @pytest.mark.parametrize(
     "predicate, start, end",
     [
@@ -514,6 +535,9 @@ def test_filter_rewriting_text_lambda_len(data, strategy, predicate, start, end)
         (lambda x: len(x) < 1 and len(x) < 1, 0, 0),
         (lambda x: len(x) > 1 and len(x) > 0, 2, 3),  # input max element_count=3
         (lambda x: len(x) < 1 and len(x) < 2, 0, 0),
+        # Comparisons involving one literal and one variable
+        (lambda x: 1 <= len(x) <= two, 1, 3),
+        (lambda x: two <= len(x) <= 4, 0, 3),
     ],
     ids=get_pretty_function_description,
 )
@@ -524,8 +548,9 @@ def test_filter_rewriting_text_lambda_len(data, strategy, predicate, start, end)
     ],
     ids=get_pretty_function_description,
 )
+@settings(max_examples=A_FEW)
 @given(data=st.data())
-def test_filter_rewriting_text_lambda_len_unique_elements(
+def test_filter_rewriting_lambda_len_unique_elements(
     data, strategy, predicate, start, end
 ):
     s = strategy.filter(predicate)
@@ -539,3 +564,56 @@ def test_filter_rewriting_text_lambda_len_unique_elements(
     assert unwrapped.filtered_strategy.max_size == end
     value = data.draw(s)
     assert predicate(value)
+
+
+@pytest.mark.parametrize(
+    "predicate",
+    [
+        (lambda x: len(x) < 3),
+        (lambda x: len(x) > 5),
+    ],
+    ids=get_pretty_function_description,
+)
+def test_does_not_rewrite_unsatisfiable_len_filter(predicate):
+    strategy = st.lists(st.none(), min_size=4, max_size=4).filter(predicate)
+    with pytest.raises(Unsatisfiable):
+        check_can_generate_examples(strategy)
+    # Rewriting to nothing() would correctly express the constraint.  However
+    # we don't want _only rewritable strategies_ to work in e.g. one_of, so:
+    assert not strategy.is_empty
+
+
+@pytest.mark.parametrize(
+    "method", ["match", "search", "findall", "fullmatch", "finditer", "split"]
+)
+@pytest.mark.parametrize(
+    "strategy, pattern",
+    [
+        (st.text(), "ab+c"),
+        (st.text(alphabet="abcdef"), "ab+c"),
+        (st.text(min_size=5, max_size=10), "ab+c"),
+        (st.binary(), b"ab+c"),
+        (st.binary(min_size=5, max_size=10), b"ab+c"),
+    ],
+    ids=repr,
+)
+@settings(max_examples=A_FEW)
+@given(data=st.data())
+def test_regex_filter_rewriting(data, strategy, pattern, method):
+    # This would raise a HealthCheck without rewriting, so checking that
+    # we can draw a valid value is sufficient.
+    predicate = getattr(re.compile(pattern), method)
+    s = strategy.filter(predicate)
+    if method in ("finditer", "split"):
+        msg = r"You applied re.compile\(.+?\).\w+ as a filter, but this allows"
+        with pytest.warns(HypothesisWarning, match=msg):
+            value = data.draw(s)
+    else:
+        value = data.draw(s)
+    assert predicate(value)
+
+
+@fails_with(TypeError)
+@given(st.text().filter(re.compile("abc").sub))
+def test_error_on_method_which_requires_multiple_args(_):
+    pass
diff --git a/hypothesis-python/tests/cover/test_health_checks.py b/hypothesis-python/tests/cover/test_health_checks.py
index ae55f3a700..5cc9217d13 100644
--- a/hypothesis-python/tests/cover/test_health_checks.py
+++ b/hypothesis-python/tests/cover/test_health_checks.py
@@ -18,6 +18,7 @@
 from hypothesis.errors import FailedHealthCheck, InvalidArgument
 from hypothesis.internal.compat import int_from_bytes
 from hypothesis.internal.conjecture.data import ConjectureData
+from hypothesis.internal.conjecture.engine import BUFFER_SIZE
 from hypothesis.internal.entropy import deterministic_PRNG
 from hypothesis.stateful import (
     RuleBasedStateMachine,
@@ -121,8 +122,12 @@ def test(x):
     assert "filter" in e.value.args[0]
 
 
+large_strategy = st.binary(min_size=7000, max_size=7000)
+too_large_strategy = st.tuples(large_strategy, large_strategy)
+
+
 def test_large_data_will_fail_a_health_check():
-    @given(st.none() | st.binary(min_size=10**5, max_size=10**5))
+    @given(st.none() | too_large_strategy)
     @settings(database=None)
     def test(x):
         pass
@@ -160,7 +165,7 @@ def a(x):
 
 
 def test_large_base_example_fails_health_check():
-    @given(st.binary(min_size=7000, max_size=7000))
+    @given(large_strategy)
     def test(b):
         pass
 
@@ -171,7 +176,7 @@ def test(b):
 
 
 def test_example_that_shrinks_to_overrun_fails_health_check():
-    @given(st.binary(min_size=9000, max_size=9000) | st.none())
+    @given(too_large_strategy | st.none())
     def test(b):
         pass
 
@@ -260,7 +265,9 @@ def test_does_not_trigger_health_check_when_most_examples_are_small(monkeypatch)
             @settings(database=None, max_examples=11, phases=[Phase.generate])
             @given(
                 st.integers(0, 100).flatmap(
-                    lambda n: st.binary(min_size=n * 100, max_size=n * 100)
+                    lambda n: st.binary(
+                        min_size=min(n * 100, BUFFER_SIZE), max_size=n * 100
+                    )
                 )
             )
             def test(b):
diff --git a/hypothesis-python/tests/cover/test_monitoring.py b/hypothesis-python/tests/cover/test_monitoring.py
new file mode 100644
index 0000000000..59dabd69ff
--- /dev/null
+++ b/hypothesis-python/tests/cover/test_monitoring.py
@@ -0,0 +1,41 @@
+# This file is part of Hypothesis, which may be found at
+# https://github.com/HypothesisWorks/hypothesis/
+#
+# Copyright the Hypothesis Authors.
+# Individual contributors are listed in AUTHORS.rst and the git log.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
+import sys
+from contextlib import contextmanager
+
+import pytest
+
+from hypothesis import given, strategies as st
+from hypothesis.errors import HypothesisWarning
+from hypothesis.internal.scrutineer import MONITORING_TOOL_ID
+
+
+@contextmanager
+def using_tool_id(tool_id, tool_name):
+    try:
+        sys.monitoring.use_tool_id(tool_id, tool_name)
+        yield
+    finally:
+        sys.monitoring.free_tool_id(tool_id)
+
+
+@pytest.mark.skipif(sys.version_info[:2] < (3, 12), reason="new namespace")
+def test_monitoring_warns_on_registered_tool_id():
+
+    # scrutineer can't run if something has already registered its tool id.
+    with using_tool_id(MONITORING_TOOL_ID, "rogue"):
+        with pytest.warns(HypothesisWarning, match=r"is already taken by tool rogue"):
+
+            @given(st.integers())
+            def f(n):
+                assert True
+
+            f()
diff --git a/hypothesis-python/tests/cover/test_stateful.py b/hypothesis-python/tests/cover/test_stateful.py
index 84b9edcc59..d9c2e09efb 100644
--- a/hypothesis-python/tests/cover/test_stateful.py
+++ b/hypothesis-python/tests/cover/test_stateful.py
@@ -225,12 +225,12 @@ def fail_fast(self):
     assignment_line = err.value.__notes__[2]
     # 'populate_bundle()' returns 2 values, so should be
     # expanded to 2 variables.
-    assert assignment_line == "v1, v2 = state.populate_bundle()"
+    assert assignment_line == "b_0, b_1 = state.populate_bundle()"
 
     # Make sure MultipleResult is iterable so the printed code is valid.
     # See https://github.com/HypothesisWorks/hypothesis/issues/2311
     state = ProducesMultiple()
-    v1, v2 = state.populate_bundle()
+    b_0, b_1 = state.populate_bundle()
     with raises(AssertionError):
         state.fail_fast()
 
@@ -252,7 +252,7 @@ def fail_fast(self, b):
         run_state_machine_as_test(ProducesMultiple)
 
     assignment_line = err.value.__notes__[2]
-    assert assignment_line == "(v1,) = state.populate_bundle()"
+    assert assignment_line == "(b_0,) = state.populate_bundle()"
 
     state = ProducesMultiple()
     (v1,) = state.populate_bundle()
@@ -797,9 +797,9 @@ def fail(self, source):
     result = "\n".join(err.value.__notes__)
     for m in ["create", "transfer", "fail"]:
         assert result.count("state." + m) == 1
-    assert "v1 = state.create()" in result
-    assert "v2 = state.transfer(source=v1)" in result
-    assert "state.fail(source=v2)" in result
+    assert "b1_0 = state.create()" in result
+    assert "b2_0 = state.transfer(source=b1_0)" in result
+    assert "state.fail(source=b2_0)" in result
 
 
 def test_initialize_rule():
@@ -845,7 +845,7 @@ class WithInitializeBundleRules(RuleBasedStateMachine):
 
         @initialize(target=a, dep=just("dep"))
         def initialize_a(self, dep):
-            return f"a v1 with ({dep})"
+            return f"a a_0 with ({dep})"
 
         @rule(param=a)
         def fail_fast(self, param):
@@ -861,8 +861,8 @@ def fail_fast(self, param):
         == """
 Falsifying example:
 state = WithInitializeBundleRules()
-v1 = state.initialize_a(dep='dep')
-state.fail_fast(param=v1)
+a_0 = state.initialize_a(dep='dep')
+state.fail_fast(param=a_0)
 state.teardown()
 """.strip()
     )
@@ -1087,8 +1087,8 @@ def mostly_fails(self, d):
 
     with pytest.raises(AssertionError) as err:
         run_state_machine_as_test(TrickyPrintingMachine)
-    assert "v1 = state.init_data(value=0)" in err.value.__notes__
-    assert "v1 = state.init_data(value=v1)" not in err.value.__notes__
+    assert "data_0 = state.init_data(value=0)" in err.value.__notes__
+    assert "data_0 = state.init_data(value=data_0)" not in err.value.__notes__
 
 
 class TrickyInitMachine(RuleBasedStateMachine):
@@ -1182,3 +1182,109 @@ def test_fails_on_settings_class_attribute():
         match="Assigning .+ as a class attribute does nothing",
     ):
         run_state_machine_as_test(ErrorsOnClassAttributeSettings)
+
+
+def test_single_target_multiple():
+    class Machine(RuleBasedStateMachine):
+        a = Bundle("a")
+
+        @initialize(target=a)
+        def initialize(self):
+            return multiple("ret1", "ret2", "ret3")
+
+        @rule(param=a)
+        def fail_fast(self, param):
+            raise AssertionError
+
+    Machine.TestCase.settings = NO_BLOB_SETTINGS
+    with pytest.raises(AssertionError) as err:
+        run_state_machine_as_test(Machine)
+
+    result = "\n".join(err.value.__notes__)
+    assert (
+        result
+        == """
+Falsifying example:
+state = Machine()
+a_0, a_1, a_2 = state.initialize()
+state.fail_fast(param=a_2)
+state.teardown()
+""".strip()
+    )
+
+
+def test_multiple_targets():
+    class Machine(RuleBasedStateMachine):
+        a = Bundle("a")
+        b = Bundle("b")
+
+        @initialize(targets=(a, b))
+        def initialize(self):
+            return multiple("ret1", "ret2", "ret3")
+
+        @rule(
+            a1=consumes(a),
+            a2=consumes(a),
+            a3=consumes(a),
+            b1=consumes(b),
+            b2=consumes(b),
+            b3=consumes(b),
+        )
+        def fail_fast(self, a1, a2, a3, b1, b2, b3):
+            raise AssertionError
+
+    Machine.TestCase.settings = NO_BLOB_SETTINGS
+    with pytest.raises(AssertionError) as err:
+        run_state_machine_as_test(Machine)
+
+    result = "\n".join(err.value.__notes__)
+    assert (
+        result
+        == """
+Falsifying example:
+state = Machine()
+a_0, b_0, a_1, b_1, a_2, b_2 = state.initialize()
+state.fail_fast(a1=a_2, a2=a_1, a3=a_0, b1=b_2, b2=b_1, b3=b_0)
+state.teardown()
+""".strip()
+    )
+
+
+def test_multiple_common_targets():
+    class Machine(RuleBasedStateMachine):
+        a = Bundle("a")
+        b = Bundle("b")
+
+        @initialize(targets=(a, b, a))
+        def initialize(self):
+            return multiple("ret1", "ret2", "ret3")
+
+        @rule(
+            a1=consumes(a),
+            a2=consumes(a),
+            a3=consumes(a),
+            a4=consumes(a),
+            a5=consumes(a),
+            a6=consumes(a),
+            b1=consumes(b),
+            b2=consumes(b),
+            b3=consumes(b),
+        )
+        def fail_fast(self, a1, a2, a3, a4, a5, a6, b1, b2, b3):
+            raise AssertionError
+
+    Machine.TestCase.settings = NO_BLOB_SETTINGS
+    with pytest.raises(AssertionError) as err:
+        run_state_machine_as_test(Machine)
+
+    result = "\n".join(err.value.__notes__)
+    assert (
+        result
+        == """
+Falsifying example:
+state = Machine()
+a_0, b_0, a_1, a_2, b_1, a_3, a_4, b_2, a_5 = state.initialize()
+state.fail_fast(a1=a_5, a2=a_4, a3=a_3, a4=a_2, a5=a_1, a6=a_0, b1=b_2, b2=b_1, b3=b_0)
+state.teardown()
+""".strip()
+    )
diff --git a/hypothesis-python/tests/pandas/test_indexes.py b/hypothesis-python/tests/pandas/test_indexes.py
index 465730d565..5680e4ad93 100644
--- a/hypothesis-python/tests/pandas/test_indexes.py
+++ b/hypothesis-python/tests/pandas/test_indexes.py
@@ -25,7 +25,7 @@
 # https://pandas.pydata.org/docs/whatsnew/v2.0.0.html#index-can-now-hold-numpy-numeric-dtypes
 @given(pdst.indexes(dtype=int, max_size=0))
 def test_gets_right_dtype_for_empty_indices(ix):
-    is_32bit = sys.maxsize == 2**32 - 1
+    is_32bit = sys.maxsize == 2**31 - 1
     pandas2 = pandas.__version__.startswith("2.")
     windows = sys.platform == "win32"  # including 64-bit windows, confusingly
     if pandas2 and (is_32bit or windows):
diff --git a/requirements/coverage.txt b/requirements/coverage.txt
index 18c7c29351..2cb320a8ca 100644
--- a/requirements/coverage.txt
+++ b/requirements/coverage.txt
@@ -16,7 +16,7 @@ click==8.1.7
     # via
     #   -r requirements/coverage.in
     #   black
-coverage==7.4.1
+coverage==7.4.3
     # via -r requirements/coverage.in
 dpcontracts==0.6.0
     # via -r requirements/coverage.in
@@ -32,7 +32,7 @@ iniconfig==2.0.0
     # via pytest
 lark==1.1.9
     # via -r requirements/coverage.in
-libcst==1.1.0
+libcst==1.2.0
     # via -r requirements/coverage.in
 mypy-extensions==1.0.0
     # via
@@ -47,7 +47,7 @@ packaging==23.2
     # via
     #   black
     #   pytest
-pandas==2.2.0
+pandas==2.2.1
     # via -r requirements/coverage.in
 pathspec==0.12.1
     # via black
@@ -61,13 +61,13 @@ ptyprocess==0.7.0
     # via pexpect
 pyarrow==15.0.0
     # via -r requirements/coverage.in
-pytest==8.0.1
+pytest==8.1.0
     # via
     #   -r requirements/test.in
     #   pytest-xdist
 pytest-xdist==3.5.0
     # via -r requirements/test.in
-python-dateutil==2.8.2
+python-dateutil==2.9.0.post0
     # via
     #   -r requirements/coverage.in
     #   pandas
@@ -77,7 +77,7 @@ pytz==2024.1
     #   pandas
 pyyaml==6.0.1
     # via libcst
-redis==5.0.1
+redis==5.0.2
     # via fakeredis
 six==1.16.0
     # via python-dateutil
@@ -89,7 +89,7 @@ tomli==2.0.1
     # via
     #   black
     #   pytest
-typing-extensions==4.9.0
+typing-extensions==4.10.0
     # via
     #   -r requirements/coverage.in
     #   black
diff --git a/requirements/fuzzing.txt b/requirements/fuzzing.txt
index 466b1ae692..3adb4171c9 100644
--- a/requirements/fuzzing.txt
+++ b/requirements/fuzzing.txt
@@ -29,11 +29,11 @@ click==8.1.7
     #   black
     #   flask
     #   hypothesis
-coverage==7.4.1
+coverage==7.4.3
     # via
     #   -r requirements/coverage.in
     #   hypofuzz
-dash==2.15.0
+dash==2.16.0
     # via hypofuzz
 dash-core-components==2.0.0
     # via dash
@@ -54,12 +54,10 @@ fakeredis==2.21.1
     # via -r requirements/coverage.in
 flask==3.0.2
     # via dash
-hypofuzz==24.2.2
+hypofuzz==24.2.3
     # via -r requirements/fuzzing.in
-hypothesis[cli]==6.98.6
-    # via
-    #   hypofuzz
-    #   hypothesis
+hypothesis[cli]==6.98.15
+    # via hypofuzz
 idna==3.6
     # via requests
 importlib-metadata==7.0.1
@@ -72,7 +70,7 @@ jinja2==3.1.3
     # via flask
 lark==1.1.9
     # via -r requirements/coverage.in
-libcst==1.1.0
+libcst==1.2.0
     # via
     #   -r requirements/coverage.in
     #   hypofuzz
@@ -100,7 +98,7 @@ packaging==23.2
     #   black
     #   plotly
     #   pytest
-pandas==2.2.0
+pandas==2.2.1
     # via
     #   -r requirements/coverage.in
     #   hypofuzz
@@ -122,14 +120,14 @@ pyarrow==15.0.0
     # via -r requirements/coverage.in
 pygments==2.17.2
     # via rich
-pytest==8.0.1
+pytest==8.1.0
     # via
     #   -r requirements/test.in
     #   hypofuzz
     #   pytest-xdist
 pytest-xdist==3.5.0
     # via -r requirements/test.in
-python-dateutil==2.8.2
+python-dateutil==2.9.0.post0
     # via
     #   -r requirements/coverage.in
     #   pandas
@@ -139,7 +137,7 @@ pytz==2024.1
     #   pandas
 pyyaml==6.0.1
     # via libcst
-redis==5.0.1
+redis==5.0.2
     # via fakeredis
 requests==2.31.0
     # via
@@ -147,7 +145,7 @@ requests==2.31.0
     #   hypofuzz
 retrying==1.3.4
     # via dash
-rich==13.7.0
+rich==13.7.1
     # via hypothesis
 six==1.16.0
     # via
@@ -164,7 +162,7 @@ tomli==2.0.1
     # via
     #   black
     #   pytest
-typing-extensions==4.9.0
+typing-extensions==4.10.0
     # via
     #   -r requirements/coverage.in
     #   black
@@ -175,7 +173,7 @@ typing-inspect==0.9.0
     # via libcst
 tzdata==2024.1
     # via pandas
-urllib3==2.2.0
+urllib3==2.2.1
     # via requests
 werkzeug==3.0.1
     # via
@@ -185,5 +183,5 @@ zipp==3.17.0
     # via importlib-metadata
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==69.1.0
+setuptools==69.1.1
     # via dash
diff --git a/requirements/test.txt b/requirements/test.txt
index 0a70d91df5..3e743c0ad5 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -22,7 +22,7 @@ pluggy==1.4.0
     # via pytest
 ptyprocess==0.7.0
     # via pexpect
-pytest==8.0.1
+pytest==8.1.0
     # via
     #   -r requirements/test.in
     #   pytest-xdist
diff --git a/requirements/tools.in b/requirements/tools.in
index fc29d543a7..4359bac1ad 100644
--- a/requirements/tools.in
+++ b/requirements/tools.in
@@ -6,6 +6,7 @@ ipython
 lark
 libcst
 mypy
+numpy
 pelican[markdown]
 pip-tools
 pyright
diff --git a/requirements/tools.txt b/requirements/tools.txt
index dfae6b4903..fc2642633a 100644
--- a/requirements/tools.txt
+++ b/requirements/tools.txt
@@ -6,7 +6,7 @@
 #
 alabaster==0.7.16
     # via sphinx
-anyio==4.2.0
+anyio==4.3.0
     # via watchfiles
 asgiref==3.7.2
     # via django
@@ -14,7 +14,7 @@ asttokens==2.4.1
     # via stack-data
 attrs==23.2.0
     # via hypothesis (hypothesis-python/setup.py)
-autoflake==2.2.1
+autoflake==2.3.0
     # via shed
 babel==2.14.0
     # via sphinx
@@ -24,9 +24,9 @@ black==24.2.0
     # via shed
 blinker==1.7.0
     # via pelican
-build==1.0.3
+build==1.1.1
     # via pip-tools
-cachetools==5.3.2
+cachetools==5.3.3
     # via tox
 certifi==2024.2.2
     # via requests
@@ -46,9 +46,9 @@ colorama==0.4.6
     # via tox
 com2ann==0.3.0
     # via shed
-coverage==7.4.1
+coverage==7.4.3
     # via -r requirements/tools.in
-cryptography==42.0.3
+cryptography==42.0.5
     # via
     #   secretstorage
     #   types-pyopenssl
@@ -95,7 +95,7 @@ importlib-metadata==7.0.1
     #   twine
 iniconfig==2.0.0
     # via pytest
-ipython==8.21.0
+ipython==8.22.1
     # via -r requirements/tools.in
 isort==5.13.2
     # via shed
@@ -113,11 +113,11 @@ jinja2==3.1.3
     #   sphinx
 jsonpointer==2.4
     # via sphinx-jsonschema
-keyring==24.3.0
+keyring==24.3.1
     # via twine
 lark==1.1.9
     # via -r requirements/tools.in
-libcst==1.1.0
+libcst==1.2.0
     # via
     #   -r requirements/tools.in
     #   shed
@@ -144,6 +144,8 @@ nh3==0.2.15
     # via readme-renderer
 nodeenv==1.8.0
     # via pyright
+numpy==1.26.4
+    # via -r requirements/tools.in
 ordered-set==4.1.0
     # via pelican
 packaging==23.2
@@ -164,7 +166,7 @@ pexpect==4.9.0
     # via ipython
 pip-tools==7.4.0
     # via -r requirements/tools.in
-pkginfo==1.9.6
+pkginfo==1.10.0
     # via twine
 platformdirs==4.2.0
     # via
@@ -198,23 +200,23 @@ pyproject-hooks==1.0.0
     # via
     #   build
     #   pip-tools
-pyright==1.1.350
+pyright==1.1.352
     # via -r requirements/tools.in
-pytest==8.0.1
+pytest==8.1.0
     # via -r requirements/tools.in
-python-dateutil==2.8.2
+python-dateutil==2.9.0.post0
     # via
     #   -r requirements/tools.in
     #   pelican
 pytz==2024.1
     # via feedgenerator
-pyupgrade==3.15.0
+pyupgrade==3.15.1
     # via shed
 pyyaml==6.0.1
     # via
     #   libcst
     #   sphinx-jsonschema
-readme-renderer==42.0
+readme-renderer==43.0
     # via twine
 requests==2.31.0
     # via
@@ -229,11 +231,11 @@ restructuredtext-lint==1.4.0
     # via -r requirements/tools.in
 rfc3986==2.0.0
     # via twine
-rich==13.7.0
+rich==13.7.1
     # via
     #   pelican
     #   twine
-ruff==0.2.2
+ruff==0.3.0
     # via -r requirements/tools.in
 secretstorage==3.3.3
     # via keyring
@@ -243,7 +245,7 @@ six==1.16.0
     # via
     #   asttokens
     #   python-dateutil
-sniffio==1.3.0
+sniffio==1.3.1
     # via anyio
 snowballstemmer==2.2.0
     # via sphinx
@@ -313,13 +315,13 @@ types-click==7.1.8
     # via -r requirements/tools.in
 types-pkg-resources==0.1.3
     # via -r requirements/tools.in
-types-pyopenssl==24.0.0.20240130
+types-pyopenssl==24.0.0.20240228
     # via types-redis
 types-pytz==2024.1.0.20240203
     # via -r requirements/tools.in
-types-redis==4.6.0.20240106
+types-redis==4.6.0.20240218
     # via -r requirements/tools.in
-typing-extensions==4.9.0
+typing-extensions==4.10.0
     # via
     #   -r requirements/tools.in
     #   anyio
@@ -332,11 +334,11 @@ typing-inspect==0.9.0
     # via libcst
 unidecode==1.3.8
     # via pelican
-urllib3==2.2.0
+urllib3==2.2.1
     # via
     #   requests
     #   twine
-virtualenv==20.25.0
+virtualenv==20.25.1
     # via tox
 watchfiles==0.21.0
     # via pelican
@@ -350,7 +352,7 @@ zipp==3.17.0
 # The following packages are considered to be unsafe in a requirements file:
 pip==24.0
     # via pip-tools
-setuptools==69.1.0
+setuptools==69.1.1
     # via
     #   nodeenv
     #   pip-tools
diff --git a/whole-repo-tests/test_mypy.py b/whole-repo-tests/test_mypy.py
index d81f8f3fb2..9c7f355a83 100644
--- a/whole-repo-tests/test_mypy.py
+++ b/whole-repo-tests/test_mypy.py
@@ -40,7 +40,7 @@ def get_mypy_output(fname, *extra_args):
     ).stdout
 
 
-def get_mypy_analysed_type(fname, val):
+def get_mypy_analysed_type(fname):
     out = get_mypy_output(fname).rstrip()
     msg = "Success: no issues found in 1 source file"
     if out.endswith(msg):
@@ -59,6 +59,8 @@ def get_mypy_analysed_type(fname, val):
             "hypothesis.strategies._internal.strategies.SearchStrategy",
             "SearchStrategy",
         )
+        .replace("numpy._typing.", "")
+        .replace("numpy.", "")
     )
 
 
@@ -125,16 +127,23 @@ def convert_lines():
             "tuples(text(), text(), text(), text(), text(), text())",
             "tuple[Any, ...]",
         ),
+        (
+            'arrays(dtype=np.dtype("int32"), shape=1)',
+            "ndarray[Any, dtype[signedinteger[_32Bit]]]",
+        ),
+        # Note: keep this in sync with the equivalent test for Pyright
     ],
 )
 def test_revealed_types(tmpdir, val, expect):
     """Check that Mypy picks up the expected `X` in SearchStrategy[`X`]."""
     f = tmpdir.join(expect + ".py")
     f.write(
+        "import numpy as np\n"
+        "from hypothesis.extra.numpy import *\n"
         "from hypothesis.strategies import *\n"
         f"reveal_type({val})\n"  # fmt: skip
     )
-    typ = get_mypy_analysed_type(str(f.realpath()), val)
+    typ = get_mypy_analysed_type(str(f.realpath()))
     assert typ == f"SearchStrategy[{expect}]"
 
 
@@ -146,7 +155,7 @@ def test_data_object_type_tracing(tmpdir):
         "s = d.draw(integers())\n"
         "reveal_type(s)\n"
     )
-    got = get_mypy_analysed_type(str(f.realpath()), "data().draw(integers())")
+    got = get_mypy_analysed_type(str(f.realpath()))
     assert got == "int"
 
 
@@ -159,7 +168,7 @@ def test_drawfn_type_tracing(tmpdir):
         "    reveal_type(s)\n"
         "    return s\n"
     )
-    got = get_mypy_analysed_type(str(f.realpath()), ...)
+    got = get_mypy_analysed_type(str(f.realpath()))
     assert got == "str"
 
 
@@ -172,7 +181,7 @@ def test_composite_type_tracing(tmpdir):
         "    return x\n"
         "reveal_type(comp)\n"
     )
-    got = get_mypy_analysed_type(str(f.realpath()), ...)
+    got = get_mypy_analysed_type(str(f.realpath()))
     assert got == "def (x: int) -> SearchStrategy[int]"
 
 
@@ -193,7 +202,7 @@ def test_functions_type_tracing(tmpdir, source, expected):
         f"g = functions({source}).example()\n"
         "reveal_type(g)\n"
     )
-    got = get_mypy_analysed_type(str(f.realpath()), ...)
+    got = get_mypy_analysed_type(str(f.realpath()))
     assert got == expected, (got, expected)
 
 
@@ -206,7 +215,7 @@ def test_settings_preserves_type(tmpdir):
         "    return x\n"
         "reveal_type(f)\n"
     )
-    got = get_mypy_analysed_type(str(f.realpath()), ...)
+    got = get_mypy_analysed_type(str(f.realpath()))
     assert got == "def (x: int) -> int"
 
 
@@ -217,7 +226,7 @@ def test_stateful_bundle_generic_type(tmpdir):
         "b: Bundle[int] = Bundle('test')\n"
         "reveal_type(b.example())\n"
     )
-    got = get_mypy_analysed_type(str(f.realpath()), ...)
+    got = get_mypy_analysed_type(str(f.realpath()))
     assert got == "int"
 
 
@@ -375,7 +384,7 @@ def test_stateful_consumes_type_tracing(tmpdir, wrapper, expected):
         f"s = {wrapped}\n"
         "reveal_type(s.example())\n"
     )
-    got = get_mypy_analysed_type(str(f.realpath()), ...)
+    got = get_mypy_analysed_type(str(f.realpath()))
     assert got == expected
 
 
diff --git a/whole-repo-tests/test_pyright.py b/whole-repo-tests/test_pyright.py
index a004bc2580..4f2442f54a 100644
--- a/whole-repo-tests/test_pyright.py
+++ b/whole-repo-tests/test_pyright.py
@@ -11,7 +11,9 @@
 from __future__ import annotations
 
 import json
+import re
 import subprocess
+import sys
 import textwrap
 from pathlib import Path
 from typing import Any
@@ -152,6 +154,81 @@ def test_pyright_issue_3348(tmp_path: Path):
     assert _get_pyright_errors(file) == []
 
 
+def test_numpy_arrays_strategy(tmp_path: Path):
+    file = tmp_path / "test.py"
+    file.write_text(
+        textwrap.dedent(
+            """
+            import numpy as np
+            from hypothesis.extra.numpy import arrays
+
+            x = arrays(dtype=np.dtype("int32"), shape=1)
+            """
+        ),
+        encoding="utf-8",
+    )
+    _write_config(tmp_path, {"typeCheckingMode": "strict"})
+    errors = _get_pyright_errors(file)
+    print(errors)
+    assert errors == []
+
+
+@pytest.mark.parametrize(
+    "val,expect",
+    [
+        ("integers()", "int"),
+        ("text()", "str"),
+        ("integers().map(str)", "str"),
+        ("booleans().filter(bool)", "bool"),
+        ("lists(none())", "List[None]"),
+        ("dictionaries(integers(), datetimes())", "Dict[int, datetime]"),
+        ("data()", "DataObject"),
+        ("none() | integers()", "int | None"),
+        ("recursive(integers(), lists)", "List[Any] | int"),
+        # We have overloads for up to five types, then fall back to Any.
+        # (why five?  JSON atoms are None|bool|int|float|str and we do that a lot)
+        ("one_of(integers(), text())", "int | str"),
+        (
+            "one_of(integers(), text(), none(), binary(), builds(list))",
+            "int | str | bytes | list[Unknown] | None",
+        ),
+        (
+            "one_of(integers(), text(), none(), binary(), builds(list), builds(dict))",
+            "Any",
+        ),
+        ("tuples()", "Tuple[()]"),
+        ("tuples(integers())", "Tuple[int]"),
+        ("tuples(integers(), text())", "Tuple[int, str]"),
+        (
+            "tuples(integers(), text(), integers(), text(), integers())",
+            "Tuple[int, str, int, str, int]",
+        ),
+        (
+            "tuples(text(), text(), text(), text(), text(), text())",
+            "Tuple[Any, ...]",
+        ),
+        (
+            'arrays(dtype=np.dtype("int32"), shape=1)',
+            "ndarray[Any, dtype[signedinteger[_32Bit]]]",
+        ),
+        # Note: keep this in sync with the equivalent test for Mypy
+    ],
+)
+def test_revealed_types(tmp_path, val, expect):
+    """Check that Pyright picks up the expected `X` in SearchStrategy[`X`]."""
+    f = tmp_path / (expect + ".py")
+    f.write_text(
+        "import numpy as np\n"
+        "from hypothesis.extra.numpy import *\n"
+        "from hypothesis.strategies import *\n"
+        f"reveal_type({val})\n",  # fmt: skip
+        encoding="utf-8",
+    )
+    _write_config(tmp_path, {"reportWildcardImportFromLibrary ": "none"})
+    typ = get_pyright_analysed_type(f)
+    assert typ == f"SearchStrategy[{expect}]"
+
+
 def test_pyright_tuples_pos_args_only(tmp_path: Path):
     file = tmp_path / "test.py"
     file.write_text(
@@ -232,7 +309,7 @@ def __init__(self) -> None:
 
 def _get_pyright_output(file: Path) -> dict[str, Any]:
     proc = subprocess.run(
-        [tool_path("pyright"), "--outputjson"],
+        [tool_path("pyright"), "--outputjson", f"--pythonpath={sys.executable}"],
         cwd=file.parent,
         encoding="utf-8",
         text=True,
@@ -249,6 +326,14 @@ def _get_pyright_errors(file: Path) -> list[dict[str, Any]]:
     return _get_pyright_output(file)["generalDiagnostics"]
 
 
+def get_pyright_analysed_type(fname):
+    out, *rest = _get_pyright_errors(fname)
+    print(out, rest)
+    assert not rest
+    assert out["severity"] == "information"
+    return re.fullmatch(r'Type of ".+" is "(.+)"', out["message"]).group(1)
+
+
 def _write_config(config_dir: Path, data: dict[str, Any] | None = None):
     config = {"extraPaths": [str(PYTHON_SRC)], **(data or {})}
     (config_dir / "pyrightconfig.json").write_text(json.dumps(config), encoding="utf-8")

From ede32b2fb014dd1757fcbada8a78c17b5433708d Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 00:44:02 -0500
Subject: [PATCH 19/43] update for new ir structure

---
 .../hypothesis/internal/conjecture/data.py    | 36 +++++++------------
 .../hypothesis/internal/conjecture/engine.py  |  4 +--
 2 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index da114da772..db82e6d790 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -930,7 +930,6 @@ class ConjectureResult:
     status: Status = attr.ib()
     interesting_origin: Optional[InterestingOrigin] = attr.ib()
     buffer: bytes = attr.ib()
-    ir_tree: IRTree = attr.ib()
     blocks: Blocks = attr.ib()
     output: str = attr.ib()
     extra_information: Optional[ExtraInformation] = attr.ib()
@@ -1516,7 +1515,7 @@ def for_buffer(
     @classmethod
     def for_ir_tree(
         cls,
-        ir_tree_prefix: IRTree,
+        ir_tree_prefix: List[IRNode],
         *,
         observer: Optional[DataObserver] = None,
         provider: type = PrimitiveProvider,
@@ -1538,7 +1537,7 @@ def __init__(
         random: Optional[Random],
         observer: Optional[DataObserver] = None,
         provider: type = PrimitiveProvider,
-        ir_tree_prefix: Optional[IRTree] = None,
+        ir_tree_prefix: Optional[List[IRNode]] = None,
     ) -> None:
         if observer is None:
             observer = DataObserver()
@@ -1605,11 +1604,7 @@ def __init__(
 
         self.extra_information = ExtraInformation()
 
-        self.ir_tree = IRTree()
-        self.ir_tree_leaves = (
-            None if ir_tree_prefix is None else ir_tree_prefix.leaves()
-        )
-        self.ir_tree_leaves_index = 0
+        self.ir_tree_nodes = ir_tree_prefix
         self.start_example(TOP_LABEL)
 
     def __repr__(self):
@@ -1672,7 +1667,7 @@ def draw_integer(
             },
         )
 
-        if self.ir_tree_leaves is not None and observe:
+        if self.ir_tree_nodes is not None and observe:
             forced = self._pop_ir_tree_value("integer", kwargs)
 
         value = self.provider.draw_integer(**kwargs, forced=forced)
@@ -1719,7 +1714,7 @@ def draw_float(
             },
         )
 
-        if self.ir_tree_leaves is not None and observe:
+        if self.ir_tree_nodes is not None and observe:
             forced = self._pop_ir_tree_value("float", kwargs)
 
         value = self.provider.draw_float(**kwargs, forced=forced)
@@ -1751,7 +1746,7 @@ def draw_string(
                 "max_size": max_size,
             },
         )
-        if self.ir_tree_leaves is not None and observe:
+        if self.ir_tree_nodes is not None and observe:
             forced = self._pop_ir_tree_value("string", kwargs)
 
         value = self.provider.draw_string(**kwargs, forced=forced)
@@ -1777,7 +1772,7 @@ def draw_bytes(
 
         kwargs: BytesKWargs = self._pooled_kwargs("bytes", {"size": size})
 
-        if self.ir_tree_leaves is not None and observe:
+        if self.ir_tree_nodes is not None and observe:
             forced = self._pop_ir_tree_value("bytes", kwargs)
 
         value = self.provider.draw_bytes(**kwargs, forced=forced)
@@ -1805,7 +1800,7 @@ def draw_boolean(
 
         kwargs: BooleanKWargs = self._pooled_kwargs("boolean", {"p": p})
 
-        if self.ir_tree_leaves is not None and observe:
+        if self.ir_tree_nodes is not None and observe:
             forced = self._pop_ir_tree_value("boolean", kwargs)
 
         value = self.provider.draw_boolean(**kwargs, forced=forced)
@@ -1839,13 +1834,12 @@ def _pooled_kwargs(self, ir_type, kwargs):
             return kwargs
 
     def _pop_ir_tree_value(self, ir_type: IRTypeName, kwargs: IRKWargsType) -> IRType:
-        assert self.ir_tree_leaves is not None
-        leaf = self.ir_tree_leaves[self.ir_tree_leaves_index]
-        assert leaf.ir_type == ir_type
-        assert kwargs == leaf.kwargs
+        assert self.ir_tree_nodes is not None
+        node = self.ir_tree_nodes.pop(0)
+        assert node.ir_type == ir_type
+        assert kwargs == node.kwargs
 
-        self.ir_tree_leaves_index += 1
-        return leaf.value
+        return node.value
 
     def as_result(self) -> Union[ConjectureResult, _Overrun]:
         """Convert the result of running this test into
@@ -1859,7 +1853,6 @@ def as_result(self) -> Union[ConjectureResult, _Overrun]:
                 status=self.status,
                 interesting_origin=self.interesting_origin,
                 buffer=self.buffer,
-                ir_tree=self.ir_tree,
                 examples=self.examples,
                 blocks=self.blocks,
                 output=self.output,
@@ -1952,7 +1945,6 @@ def start_example(self, label: int) -> None:
             self.max_depth = self.depth
         self.__example_record.start_example(label)
         self.labels_for_structure_stack.append({label})
-        self.ir_tree.start_example(label)
 
     def stop_example(self, *, discard: bool = False) -> None:
         if self.frozen:
@@ -1998,8 +1990,6 @@ def stop_example(self, *, discard: bool = False) -> None:
 
             self.observer.kill_branch()
 
-        self.ir_tree.stop_example()
-
     @property
     def examples(self) -> Examples:
         assert self.frozen
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index ac6cfb3fec..bf0dedf9c3 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -284,7 +284,7 @@ def test_function(self, data):
 
         if data.status == Status.INTERESTING:
             if self.settings.backend != "hypothesis":
-                for node in data.ir_tree.leaves():
+                for node in data.examples.ir_tree_nodes:
                     value = data.provider.post_test_case_hook(node.value)
                     # require providers to return something valid here.
                     assert node.value is not None
@@ -292,7 +292,7 @@ def test_function(self, data):
 
                 # drive the ir tree through the test function to convert it
                 # to a buffer
-                data = ConjectureData.for_ir_tree(data.ir_tree)
+                data = ConjectureData.for_ir_tree(data.examples.ir_tree_nodes)
                 self.__stoppable_test_function(data)
 
                 # ir tree conversion works by using forced=. This works great,

From 58eaf7855183784d621cb2c97ce36f1894c5af6d Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 16:42:28 -0500
Subject: [PATCH 20/43] implement fake_forced

---
 .../hypothesis/internal/conjecture/data.py    | 225 ++++++++++++++----
 .../hypothesis/internal/conjecture/engine.py  |   9 -
 .../hypothesis/internal/conjecture/utils.py   |  24 +-
 .../tests/conjecture/test_alt_backend.py      |  15 +-
 .../tests/cover/test_health_checks.py         |   4 +-
 5 files changed, 208 insertions(+), 69 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index db82e6d790..1131dbca36 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -978,7 +978,13 @@ def post_test_case_hook(self, value):
     def per_test_case_context_manager(self):
         return contextlib.nullcontext()
 
-    def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool:
+    def draw_boolean(
+        self,
+        p: float = 0.5,
+        *,
+        forced: Optional[bool] = None,
+        fake_forced: bool = False,
+    ) -> bool:
         """Return True with probability p (assuming a uniform generator),
         shrinking towards False. If ``forced`` is set to a non-None value, this
         will always return that value but will write choices appropriate to having
@@ -1046,7 +1052,9 @@ def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool
                     partial = True
 
                 i = self._cd.draw_bits(
-                    bits, forced=None if forced is None else int(forced)
+                    bits,
+                    forced=None if forced is None else int(forced),
+                    fake_forced=fake_forced,
                 )
 
                 # We always choose the region that causes us to repeat the loop as
@@ -1090,6 +1098,7 @@ def draw_integer(
         weights: Optional[Sequence[float]] = None,
         shrink_towards: int = 0,
         forced: Optional[int] = None,
+        fake_forced: bool = False,
     ) -> int:
         if min_value is not None:
             shrink_towards = max(min_value, shrink_towards)
@@ -1111,7 +1120,7 @@ def draw_integer(
                     forced_idx = forced - shrink_towards
                 else:
                     forced_idx = shrink_towards + gap - forced
-            idx = sampler.sample(self._cd, forced=forced_idx)
+            idx = sampler.sample(self._cd, forced=forced_idx, fake_forced=fake_forced)
 
             # For range -2..2, interpret idx = 0..4 as [0, 1, 2, -1, -2]
             if idx <= gap:
@@ -1120,7 +1129,7 @@ def draw_integer(
                 return shrink_towards - (idx - gap)
 
         if min_value is None and max_value is None:
-            return self._draw_unbounded_integer(forced=forced)
+            return self._draw_unbounded_integer(forced=forced, fake_forced=fake_forced)
 
         if min_value is None:
             assert max_value is not None  # make mypy happy
@@ -1128,7 +1137,8 @@ def draw_integer(
             while max_value < probe:
                 self._cd.start_example(ONE_BOUND_INTEGERS_LABEL)
                 probe = shrink_towards + self._draw_unbounded_integer(
-                    forced=None if forced is None else forced - shrink_towards
+                    forced=None if forced is None else forced - shrink_towards,
+                    fake_forced=fake_forced,
                 )
                 self._cd.stop_example()
             return probe
@@ -1139,7 +1149,8 @@ def draw_integer(
             while probe < min_value:
                 self._cd.start_example(ONE_BOUND_INTEGERS_LABEL)
                 probe = shrink_towards + self._draw_unbounded_integer(
-                    forced=None if forced is None else forced - shrink_towards
+                    forced=None if forced is None else forced - shrink_towards,
+                    fake_forced=fake_forced,
                 )
                 self._cd.stop_example()
             return probe
@@ -1149,6 +1160,7 @@ def draw_integer(
             max_value,
             center=shrink_towards,
             forced=forced,
+            fake_forced=fake_forced,
         )
 
     def draw_float(
@@ -1163,6 +1175,7 @@ def draw_float(
         # width: Literal[16, 32, 64] = 64,
         # exclude_min and exclude_max handled higher up,
         forced: Optional[float] = None,
+        fake_forced: bool = False,
     ) -> float:
         (
             sampler,
@@ -1185,11 +1198,17 @@ def draw_float(
             # i == 0 is able to produce all possible floats, and the forcing
             # logic is simpler if we assume this choice.
             forced_i = None if forced is None else 0
-            i = sampler.sample(self._cd, forced=forced_i) if sampler else 0
+            i = (
+                sampler.sample(self._cd, forced=forced_i, fake_forced=fake_forced)
+                if sampler
+                else 0
+            )
             self._cd.start_example(DRAW_FLOAT_LABEL)
             if i == 0:
                 result = self._draw_float(
-                    forced_sign_bit=forced_sign_bit, forced=forced
+                    forced_sign_bit=forced_sign_bit,
+                    forced=forced,
+                    fake_forced=fake_forced,
                 )
                 if allow_nan and math.isnan(result):
                     clamped = result
@@ -1202,12 +1221,12 @@ def draw_float(
                 if clamped != result and not (math.isnan(result) and allow_nan):
                     self._cd.stop_example()
                     self._cd.start_example(DRAW_FLOAT_LABEL)
-                    self._draw_float(forced=clamped)
+                    self._draw_float(forced=clamped, fake_forced=fake_forced)
                     result = clamped
             else:
                 result = nasty_floats[i - 1]
 
-                self._draw_float(forced=result)
+                self._draw_float(forced=result, fake_forced=fake_forced)
 
             self._cd.stop_example()  # (DRAW_FLOAT_LABEL)
             self._cd.stop_example()  # (FLOAT_STRATEGY_DO_DRAW_LABEL)
@@ -1220,6 +1239,7 @@ def draw_string(
         min_size: int = 0,
         max_size: Optional[int] = None,
         forced: Optional[str] = None,
+        fake_forced: bool = False,
     ) -> str:
         if max_size is None:
             max_size = DRAW_STRING_DEFAULT_MAX_SIZE
@@ -1238,6 +1258,7 @@ def draw_string(
             max_size=max_size,
             average_size=average_size,
             forced=None if forced is None else len(forced),
+            fake_forced=fake_forced,
             observe=False,
         )
         while elements.more():
@@ -1248,30 +1269,47 @@ def draw_string(
 
             if len(intervals) > 256:
                 if self.draw_boolean(
-                    0.2, forced=None if forced_i is None else forced_i > 255
+                    0.2,
+                    forced=None if forced_i is None else forced_i > 255,
+                    fake_forced=fake_forced,
                 ):
                     i = self._draw_bounded_integer(
-                        256, len(intervals) - 1, forced=forced_i
+                        256,
+                        len(intervals) - 1,
+                        forced=forced_i,
+                        fake_forced=fake_forced,
                     )
                 else:
-                    i = self._draw_bounded_integer(0, 255, forced=forced_i)
+                    i = self._draw_bounded_integer(
+                        0, 255, forced=forced_i, fake_forced=fake_forced
+                    )
             else:
-                i = self._draw_bounded_integer(0, len(intervals) - 1, forced=forced_i)
+                i = self._draw_bounded_integer(
+                    0, len(intervals) - 1, forced=forced_i, fake_forced=fake_forced
+                )
 
             chars.append(intervals.char_in_shrink_order(i))
 
         return "".join(chars)
 
-    def draw_bytes(self, size: int, *, forced: Optional[bytes] = None) -> bytes:
+    def draw_bytes(
+        self, size: int, *, forced: Optional[bytes] = None, fake_forced: bool = False
+    ) -> bytes:
         forced_i = None
         if forced is not None:
             forced_i = int_from_bytes(forced)
             size = len(forced)
 
-        return self._cd.draw_bits(8 * size, forced=forced_i).to_bytes(size, "big")
+        return self._cd.draw_bits(
+            8 * size, forced=forced_i, fake_forced=fake_forced
+        ).to_bytes(size, "big")
 
     def _draw_float(
-        self, forced_sign_bit: Optional[int] = None, *, forced: Optional[float] = None
+        self,
+        forced_sign_bit: Optional[int] = None,
+        *,
+        forced: Optional[float] = None,
+        fake_forced: bool = False,
     ) -> float:
         """
         Helper for draw_float which draws a random 64-bit float.
@@ -1280,15 +1318,21 @@ def _draw_float(
             # sign_aware_lte(forced, -0.0) does not correctly handle the
             # math.nan case here.
             forced_sign_bit = math.copysign(1, forced) == -1
-        is_negative = self._cd.draw_bits(1, forced=forced_sign_bit)
+        is_negative = self._cd.draw_bits(
+            1, forced=forced_sign_bit, fake_forced=fake_forced
+        )
         f = lex_to_float(
             self._cd.draw_bits(
-                64, forced=None if forced is None else float_to_lex(abs(forced))
+                64,
+                forced=None if forced is None else float_to_lex(abs(forced)),
+                fake_forced=fake_forced,
             )
         )
         return -f if is_negative else f
 
-    def _draw_unbounded_integer(self, *, forced: Optional[int] = None) -> int:
+    def _draw_unbounded_integer(
+        self, *, forced: Optional[int] = None, fake_forced: bool = False
+    ) -> int:
         forced_i = None
         if forced is not None:
             # Using any bucket large enough to contain this integer would be a
@@ -1303,7 +1347,9 @@ def _draw_unbounded_integer(self, *, forced: Optional[int] = None) -> int:
             size = min(size for size in INT_SIZES if bit_size <= size)
             forced_i = INT_SIZES.index(size)
 
-        size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd, forced=forced_i)]
+        size = INT_SIZES[
+            INT_SIZES_SAMPLER.sample(self._cd, forced=forced_i, fake_forced=fake_forced)
+        ]
 
         forced_r = None
         if forced is not None:
@@ -1313,7 +1359,7 @@ def _draw_unbounded_integer(self, *, forced: Optional[int] = None) -> int:
                 forced_r = -forced_r
                 forced_r |= 1
 
-        r = self._cd.draw_bits(size, forced=forced_r)
+        r = self._cd.draw_bits(size, forced=forced_r, fake_forced=fake_forced)
         sign = r & 1
         r >>= 1
         if sign:
@@ -1327,6 +1373,7 @@ def _draw_bounded_integer(
         *,
         center: Optional[int] = None,
         forced: Optional[int] = None,
+        fake_forced: bool = False,
     ) -> int:
         assert lower <= upper
         assert forced is None or lower <= forced <= upper
@@ -1348,7 +1395,9 @@ def _draw_bounded_integer(
             above = True
         else:
             force_above = None if forced is None else forced < center
-            above = not self._cd.draw_bits(1, forced=force_above)
+            above = not self._cd.draw_bits(
+                1, forced=force_above, fake_forced=fake_forced
+            )
 
         if above:
             gap = upper - center
@@ -1361,7 +1410,7 @@ def _draw_bounded_integer(
         probe = gap + 1
 
         if bits > 24 and self.draw_boolean(
-            7 / 8, forced=None if forced is None else False
+            7 / 8, forced=None if forced is None else False, fake_forced=fake_forced
         ):
             # For large ranges, we combine the uniform random distribution from draw_bits
             # with a weighting scheme with moderate chance.  Cutoff at 2 ** 24 so that our
@@ -1372,7 +1421,9 @@ def _draw_bounded_integer(
         while probe > gap:
             self._cd.start_example(INTEGER_RANGE_DRAW_LABEL)
             probe = self._cd.draw_bits(
-                bits, forced=None if forced is None else abs(forced - center)
+                bits,
+                forced=None if forced is None else abs(forced - center),
+                fake_forced=fake_forced,
             )
             self._cd.stop_example()
 
@@ -1614,7 +1665,8 @@ def __repr__(self):
             ", frozen" if self.frozen else "",
         )
 
-    # A bit of explanation of the `observe` argument in our draw_* functions.
+    # A bit of explanation of the `observe` and `fake_forced` arguments in our
+    # draw_* functions.
     #
     # There are two types of draws: sub-ir and super-ir. For instance, some ir
     # nodes use `many`, which in turn calls draw_boolean. But some strategies
@@ -1626,6 +1678,17 @@ def __repr__(self):
     #
     # `observe` formalizes this distinction. The draw will only be written to
     # the DataTree if observe is True.
+    #
+    # `fake_forced` deals with a different problem. We use `forced=` to convert
+    # ir prefixes, which are potentially from other backends, into our backing
+    # bits representation. This works fine, except using `forced=` in this way
+    # also sets `was_forced=True` for all blocks, even those that weren't forced
+    # in the traditional way. The shrinker chokes on this due to thinking that
+    # nothing can be modified.
+    #
+    # Setting `fake_forced` to true says that yes, we want to force a particular
+    # value to be returned, but we don't want to treat that block as fixed for
+    # e.g. the shrinker.
 
     def draw_integer(
         self,
@@ -1636,6 +1699,7 @@ def draw_integer(
         weights: Optional[Sequence[float]] = None,
         shrink_towards: int = 0,
         forced: Optional[int] = None,
+        fake_forced: bool = False,
         observe: bool = True,
     ) -> int:
         # Validate arguments
@@ -1668,15 +1732,23 @@ def draw_integer(
         )
 
         if self.ir_tree_nodes is not None and observe:
-            forced = self._pop_ir_tree_value("integer", kwargs)
+            node = self._pop_ir_tree_node("integer", kwargs)
+            assert isinstance(node.value, int)
+            forced = node.value
+            fake_forced = not node.was_forced
 
-        value = self.provider.draw_integer(**kwargs, forced=forced)
+        value = self.provider.draw_integer(
+            **kwargs, forced=forced, fake_forced=fake_forced
+        )
         if observe:
             self.observer.draw_integer(
-                value, kwargs=kwargs, was_forced=forced is not None
+                value, kwargs=kwargs, was_forced=forced is not None and not fake_forced
             )
             self.__example_record.record_ir_draw(
-                "integer", value, kwargs=kwargs, was_forced=forced is not None
+                "integer",
+                value,
+                kwargs=kwargs,
+                was_forced=forced is not None and not fake_forced,
             )
         return value
 
@@ -1692,6 +1764,7 @@ def draw_float(
         # width: Literal[16, 32, 64] = 64,
         # exclude_min and exclude_max handled higher up,
         forced: Optional[float] = None,
+        fake_forced: bool = False,
         observe: bool = True,
     ) -> float:
         assert smallest_nonzero_magnitude > 0
@@ -1715,15 +1788,23 @@ def draw_float(
         )
 
         if self.ir_tree_nodes is not None and observe:
-            forced = self._pop_ir_tree_value("float", kwargs)
+            node = self._pop_ir_tree_node("float", kwargs)
+            assert isinstance(node.value, float)
+            forced = node.value
+            fake_forced = not node.was_forced
 
-        value = self.provider.draw_float(**kwargs, forced=forced)
+        value = self.provider.draw_float(
+            **kwargs, forced=forced, fake_forced=fake_forced
+        )
         if observe:
             self.observer.draw_float(
-                value, kwargs=kwargs, was_forced=forced is not None
+                value, kwargs=kwargs, was_forced=forced is not None and not fake_forced
             )
             self.__example_record.record_ir_draw(
-                "float", value, kwargs=kwargs, was_forced=forced is not None
+                "float",
+                value,
+                kwargs=kwargs,
+                was_forced=forced is not None and not fake_forced,
             )
         return value
 
@@ -1734,6 +1815,7 @@ def draw_string(
         min_size: int = 0,
         max_size: Optional[int] = None,
         forced: Optional[str] = None,
+        fake_forced: bool = False,
         observe: bool = True,
     ) -> str:
         assert forced is None or min_size <= len(forced)
@@ -1747,15 +1829,23 @@ def draw_string(
             },
         )
         if self.ir_tree_nodes is not None and observe:
-            forced = self._pop_ir_tree_value("string", kwargs)
+            node = self._pop_ir_tree_node("string", kwargs)
+            assert isinstance(node.value, str)
+            forced = node.value
+            fake_forced = not node.was_forced
 
-        value = self.provider.draw_string(**kwargs, forced=forced)
+        value = self.provider.draw_string(
+            **kwargs, forced=forced, fake_forced=fake_forced
+        )
         if observe:
             self.observer.draw_string(
-                value, kwargs=kwargs, was_forced=forced is not None
+                value, kwargs=kwargs, was_forced=forced is not None and not fake_forced
             )
             self.__example_record.record_ir_draw(
-                "string", value, kwargs=kwargs, was_forced=forced is not None
+                "string",
+                value,
+                kwargs=kwargs,
+                was_forced=forced is not None and not fake_forced,
             )
         return value
 
@@ -1765,6 +1855,7 @@ def draw_bytes(
         size: int,
         *,
         forced: Optional[bytes] = None,
+        fake_forced: bool = False,
         observe: bool = True,
     ) -> bytes:
         assert forced is None or len(forced) == size
@@ -1773,20 +1864,33 @@ def draw_bytes(
         kwargs: BytesKWargs = self._pooled_kwargs("bytes", {"size": size})
 
         if self.ir_tree_nodes is not None and observe:
-            forced = self._pop_ir_tree_value("bytes", kwargs)
+            node = self._pop_ir_tree_node("bytes", kwargs)
+            assert isinstance(node.value, bytes)
+            forced = node.value
+            fake_forced = not node.was_forced
 
-        value = self.provider.draw_bytes(**kwargs, forced=forced)
+        value = self.provider.draw_bytes(
+            **kwargs, forced=forced, fake_forced=fake_forced
+        )
         if observe:
             self.observer.draw_bytes(
-                value, kwargs=kwargs, was_forced=forced is not None
+                value, kwargs=kwargs, was_forced=forced is not None and not fake_forced
             )
             self.__example_record.record_ir_draw(
-                "bytes", value, kwargs=kwargs, was_forced=forced is not None
+                "bytes",
+                value,
+                kwargs=kwargs,
+                was_forced=forced is not None and not fake_forced,
             )
         return value
 
     def draw_boolean(
-        self, p: float = 0.5, *, forced: Optional[bool] = None, observe: bool = True
+        self,
+        p: float = 0.5,
+        *,
+        forced: Optional[bool] = None,
+        observe: bool = True,
+        fake_forced: bool = False,
     ) -> bool:
         # Internally, we treat probabilities lower than 1 / 2**64 as
         # unconditionally false.
@@ -1801,15 +1905,23 @@ def draw_boolean(
         kwargs: BooleanKWargs = self._pooled_kwargs("boolean", {"p": p})
 
         if self.ir_tree_nodes is not None and observe:
-            forced = self._pop_ir_tree_value("boolean", kwargs)
+            node = self._pop_ir_tree_node("boolean", kwargs)
+            assert isinstance(node.value, bool)
+            forced = node.value
+            fake_forced = not node.was_forced
 
-        value = self.provider.draw_boolean(**kwargs, forced=forced)
+        value = self.provider.draw_boolean(
+            **kwargs, forced=forced, fake_forced=fake_forced
+        )
         if observe:
             self.observer.draw_boolean(
-                value, kwargs=kwargs, was_forced=forced is not None
+                value, kwargs=kwargs, was_forced=forced is not None and not fake_forced
             )
             self.__example_record.record_ir_draw(
-                "boolean", value, kwargs=kwargs, was_forced=forced is not None
+                "boolean",
+                value,
+                kwargs=kwargs,
+                was_forced=forced is not None and not fake_forced,
             )
         return value
 
@@ -1833,13 +1945,13 @@ def _pooled_kwargs(self, ir_type, kwargs):
             POOLED_KWARGS_CACHE[key] = kwargs
             return kwargs
 
-    def _pop_ir_tree_value(self, ir_type: IRTypeName, kwargs: IRKWargsType) -> IRType:
+    def _pop_ir_tree_node(self, ir_type: IRTypeName, kwargs: IRKWargsType) -> IRNode:
         assert self.ir_tree_nodes is not None
         node = self.ir_tree_nodes.pop(0)
         assert node.ir_type == ir_type
         assert kwargs == node.kwargs
 
-        return node.value
+        return node
 
     def as_result(self) -> Union[ConjectureResult, _Overrun]:
         """Convert the result of running this test into
@@ -2021,13 +2133,22 @@ def choice(
         values: Sequence[T],
         *,
         forced: Optional[T] = None,
+        fake_forced: bool = False,
         observe: bool = True,
     ) -> T:
         forced_i = None if forced is None else values.index(forced)
-        i = self.draw_integer(0, len(values) - 1, forced=forced_i, observe=observe)
+        i = self.draw_integer(
+            0,
+            len(values) - 1,
+            forced=forced_i,
+            fake_forced=fake_forced,
+            observe=observe,
+        )
         return values[i]
 
-    def draw_bits(self, n: int, *, forced: Optional[int] = None) -> int:
+    def draw_bits(
+        self, n: int, *, forced: Optional[int] = None, fake_forced: bool = False
+    ) -> int:
         """Return an ``n``-bit integer from the underlying source of
         bytes. If ``forced`` is set to an integer will instead
         ignore the underlying source and simulate a draw as if it had
@@ -2069,7 +2190,7 @@ def draw_bits(self, n: int, *, forced: Optional[int] = None) -> int:
         self.buffer.extend(buf)
         self.index = len(self.buffer)
 
-        if forced is not None:
+        if forced is not None and not fake_forced:
             self.forced_indices.update(range(initial, self.index))
 
         self.blocks.add_endpoint(self.index)
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index bf0dedf9c3..d02593e066 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -295,15 +295,6 @@ def test_function(self, data):
                 data = ConjectureData.for_ir_tree(data.examples.ir_tree_nodes)
                 self.__stoppable_test_function(data)
 
-                # ir tree conversion works by using forced=. This works great,
-                # but has the side effect of causing *all* blocks to be marked
-                # as forced. The shrinker in turn thinks these blocks are
-                # trivial and avoids shrinking them.
-                # We'll drive the buffer through the test function one more
-                # time to set up the blocks correctly for the shrinker.
-                data = ConjectureData.for_buffer(data.buffer)
-                self.__stoppable_test_function(data)
-
                 self.__data_cache[data.buffer] = data.as_result()
 
             key = data.interesting_origin
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/utils.py b/hypothesis-python/src/hypothesis/internal/conjecture/utils.py
index 5e77437a78..7ce126f2f0 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/utils.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/utils.py
@@ -166,7 +166,12 @@ def __init__(self, weights: Sequence[float], *, observe: bool = True):
                 self.table.append((base, alternate, alternate_chance))
         self.table.sort()
 
-    def sample(self, data: "ConjectureData", forced: Optional[int] = None) -> int:
+    def sample(
+        self,
+        data: "ConjectureData",
+        forced: Optional[int] = None,
+        fake_forced: bool = False,
+    ) -> int:
         data.start_example(SAMPLE_IN_SAMPLER_LABEL)
         forced_choice = (  # pragma: no branch # https://github.com/nedbat/coveragepy/issues/1617
             None
@@ -178,7 +183,10 @@ def sample(self, data: "ConjectureData", forced: Optional[int] = None) -> int:
             )
         )
         base, alternate, alternate_chance = data.choice(
-            self.table, forced=forced_choice, observe=self.observe
+            self.table,
+            forced=forced_choice,
+            fake_forced=fake_forced,
+            observe=self.observe,
         )
         forced_use_alternate = None
         if forced is not None:
@@ -189,7 +197,10 @@ def sample(self, data: "ConjectureData", forced: Optional[int] = None) -> int:
             assert forced == base or forced_use_alternate
 
         use_alternate = data.draw_boolean(
-            alternate_chance, forced=forced_use_alternate, observe=self.observe
+            alternate_chance,
+            forced=forced_use_alternate,
+            fake_forced=fake_forced,
+            observe=self.observe,
         )
         data.stop_example()
         if use_alternate:
@@ -224,6 +235,7 @@ def __init__(
         average_size: Union[int, float],
         *,
         forced: Optional[int] = None,
+        fake_forced: bool = False,
         observe: bool = True,
     ) -> None:
         assert 0 <= min_size <= average_size <= max_size
@@ -232,6 +244,7 @@ def __init__(
         self.max_size = max_size
         self.data = data
         self.forced_size = forced
+        self.fake_forced = fake_forced
         self.p_continue = _calc_p_continue(average_size - min_size, max_size - min_size)
         self.count = 0
         self.rejections = 0
@@ -267,7 +280,10 @@ def more(self) -> bool:
             elif self.forced_size is not None:
                 forced_result = self.count < self.forced_size
             should_continue = self.data.draw_boolean(
-                self.p_continue, forced=forced_result, observe=self.observe
+                self.p_continue,
+                forced=forced_result,
+                fake_forced=self.fake_forced,
+                observe=self.observe,
             )
 
         if should_continue:
diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py
index 7b911ae11f..833001bf55 100644
--- a/hypothesis-python/tests/conjecture/test_alt_backend.py
+++ b/hypothesis-python/tests/conjecture/test_alt_backend.py
@@ -35,7 +35,13 @@ class PrngProvider(PrimitiveProvider):
     def __init__(self, conjecturedata: "ConjectureData", /) -> None:
         self.prng = Random(conjecturedata.buffer or None)
 
-    def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool:
+    def draw_boolean(
+        self,
+        p: float = 0.5,
+        *,
+        forced: Optional[bool] = None,
+        fake_forced: bool = False,
+    ) -> bool:
         if forced is not None:
             return forced
         return self.prng.random() < p
@@ -49,6 +55,7 @@ def draw_integer(
         weights: Optional[Sequence[float]] = None,
         shrink_towards: int = 0,
         forced: Optional[int] = None,
+        fake_forced: bool = False,
     ) -> int:
         assert isinstance(shrink_towards, int)  # otherwise ignored here
         if forced is not None:
@@ -80,6 +87,7 @@ def draw_float(
         allow_nan: bool = True,
         smallest_nonzero_magnitude: float,
         forced: Optional[float] = None,
+        fake_forced: bool = False,
     ) -> float:
         if forced is not None:
             return forced
@@ -116,6 +124,7 @@ def draw_string(
         min_size: int = 0,
         max_size: Optional[int] = None,
         forced: Optional[str] = None,
+        fake_forced: bool = False,
     ) -> str:
         if forced is not None:
             return forced
@@ -124,7 +133,9 @@ def draw_string(
         )
         return "".join(map(chr, self.prng.choices(intervals, k=size)))
 
-    def draw_bytes(self, size: int, *, forced: Optional[bytes] = None) -> bytes:
+    def draw_bytes(
+        self, size: int, *, forced: Optional[bytes] = None, fake_forced: bool = False
+    ) -> bytes:
         if forced is not None:
             return forced
         return self.prng.randbytes(size)
diff --git a/hypothesis-python/tests/cover/test_health_checks.py b/hypothesis-python/tests/cover/test_health_checks.py
index 5cc9217d13..a4165b715f 100644
--- a/hypothesis-python/tests/cover/test_health_checks.py
+++ b/hypothesis-python/tests/cover/test_health_checks.py
@@ -237,9 +237,9 @@ def test_does_not_trigger_health_check_on_simple_strategies(monkeypatch):
     # We need to make drawing data artificially slow in order to trigger this
     # effect. This isn't actually slow because time is fake in our CI, but
     # we need it to pretend to be.
-    def draw_bits(self, n, forced=None):
+    def draw_bits(self, n, forced=None, fake_forced=False):
         time.sleep(0.001)
-        return existing_draw_bits(self, n, forced=forced)
+        return existing_draw_bits(self, n, forced=forced, fake_forced=fake_forced)
 
     monkeypatch.setattr(ConjectureData, "draw_bits", draw_bits)
 

From 308ca92356fd864db258479f11793b978e614b07 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 16:42:40 -0500
Subject: [PATCH 21/43] add test for backend shrinking ability

---
 .../tests/conjecture/test_alt_backend.py              | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py
index 833001bf55..450175a2fe 100644
--- a/hypothesis-python/tests/conjecture/test_alt_backend.py
+++ b/hypothesis-python/tests/conjecture/test_alt_backend.py
@@ -26,6 +26,8 @@
 from hypothesis.internal.floats import SIGNALING_NAN
 from hypothesis.internal.intervalsets import IntervalSet
 
+from tests.common.debug import minimal
+
 
 class PrngProvider(PrimitiveProvider):
     # A test-only implementation of the PrimitiveProvider interface, which uses
@@ -182,3 +184,12 @@ def test(value):
     assert db.data
     buffers = {x for x in db.data[next(iter(db.data))] if x}
     assert buffers, db.data
+
+
+def test_backend_can_shrink():
+    with temp_register_backend():
+        n = minimal(
+            st.integers(), lambda n: n >= 123456, settings=settings(backend="prng")
+        )
+
+    assert n == 123456

From 443c3bb3b0428636f23893c380af78d1276d4ad6 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 19:49:47 -0500
Subject: [PATCH 22/43] wording

---
 .../src/hypothesis/internal/conjecture/engine.py             | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index d02593e066..06f2990b2b 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -287,14 +287,15 @@ def test_function(self, data):
                 for node in data.examples.ir_tree_nodes:
                     value = data.provider.post_test_case_hook(node.value)
                     # require providers to return something valid here.
-                    assert node.value is not None
+                    assert (
+                        value is not None
+                    ), "providers must return a non-null value from post_test_case_hook"
                     node.value = value
 
                 # drive the ir tree through the test function to convert it
                 # to a buffer
                 data = ConjectureData.for_ir_tree(data.examples.ir_tree_nodes)
                 self.__stoppable_test_function(data)
-
                 self.__data_cache[data.buffer] = data.as_result()
 
             key = data.interesting_origin

From e6366757d3212c5f24fd9ccf93b869474cd3319e Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 20:44:24 -0500
Subject: [PATCH 23/43] extract PrimitiveProvider to interface, add lifetime
 attr

---
 .../hypothesis/internal/conjecture/data.py    | 103 ++++++++++++++++--
 .../hypothesis/internal/conjecture/engine.py  |  40 ++++---
 .../tests/conjecture/test_alt_backend.py      |   7 +-
 3 files changed, 128 insertions(+), 22 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index 1131dbca36..40c7ac44ad 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -8,6 +8,7 @@
 # v. 2.0. If a copy of the MPL was not distributed with this file, You can
 # obtain one at https://mozilla.org/MPL/2.0/.
 
+import abc
 import contextlib
 import math
 import time
@@ -957,7 +958,7 @@ def as_result(self) -> "ConjectureResult":
 BYTE_MASKS[0] = 255
 
 
-class PrimitiveProvider:
+class PrimitiveProvider(abc.ABC):
     # This is the low-level interface which would also be implemented
     # by e.g. CrossHair, by an Atheris-hypothesis integration, etc.
     # We'd then build the structured tree handling, database and replay
@@ -965,7 +966,26 @@ class PrimitiveProvider:
     #
     # See https://github.com/HypothesisWorks/hypothesis/issues/3086
 
-    def __init__(self, conjecturedata: "ConjectureData", /) -> None:
+    # How long a provider instance is used for. One of test_function or
+    # test_case. Defaults to test_function.
+    #
+    # If test_function, a single provider instance will be instantiated and used
+    # for the entirety of each test function. I.e., roughly one provider per
+    # @given annotation. This can be useful if you need to track state over many
+    # executions to a test function.
+    #
+    # This lifetime will cause None to be passed for the ConjectureData object
+    # in PrimitiveProvider.__init__, because that object is instantiated per
+    # test case.
+    #
+    # If test_case, a new provider instance will be instantiated and used each
+    # time hypothesis tries to generate a new input to the test function. This
+    # lifetime can access the passed ConjectureData object.
+    #
+    # Non-hypothesis providers probably want to set a lifetime of test_case.
+    lifetime = "test_function"
+
+    def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None:
         self._cd = conjecturedata
 
     def post_test_case_hook(self, value):
@@ -978,6 +998,73 @@ def post_test_case_hook(self, value):
     def per_test_case_context_manager(self):
         return contextlib.nullcontext()
 
+    @abc.abstractmethod
+    def draw_boolean(
+        self,
+        p: float = 0.5,
+        *,
+        forced: Optional[bool] = None,
+        fake_forced: bool = False,
+    ) -> bool:
+        pass
+
+    @abc.abstractmethod
+    def draw_integer(
+        self,
+        min_value: Optional[int] = None,
+        max_value: Optional[int] = None,
+        *,
+        # weights are for choosing an element index from a bounded range
+        weights: Optional[Sequence[float]] = None,
+        shrink_towards: int = 0,
+        forced: Optional[int] = None,
+        fake_forced: bool = False,
+    ) -> int:
+        pass
+
+    @abc.abstractmethod
+    def draw_float(
+        self,
+        *,
+        min_value: float = -math.inf,
+        max_value: float = math.inf,
+        allow_nan: bool = True,
+        smallest_nonzero_magnitude: float,
+        # TODO: consider supporting these float widths at the IR level in the
+        # future.
+        # width: Literal[16, 32, 64] = 64,
+        # exclude_min and exclude_max handled higher up,
+        forced: Optional[float] = None,
+        fake_forced: bool = False,
+    ) -> float:
+        pass
+
+    @abc.abstractmethod
+    def draw_string(
+        self,
+        intervals: IntervalSet,
+        *,
+        min_size: int = 0,
+        max_size: Optional[int] = None,
+        forced: Optional[str] = None,
+        fake_forced: bool = False,
+    ) -> str:
+        pass
+
+    @abc.abstractmethod
+    def draw_bytes(
+        self, size: int, *, forced: Optional[bytes] = None, fake_forced: bool = False
+    ) -> bytes:
+        pass
+
+
+class HypothesisProvider(PrimitiveProvider):
+    lifetime = "test_case"
+
+    def __init__(self, conjecturedata: Optional["ConjectureData"], /):
+        assert conjecturedata is not None
+        super().__init__(conjecturedata)
+
     def draw_boolean(
         self,
         p: float = 0.5,
@@ -1546,7 +1633,7 @@ def permitted(f):
 # NOTE: this is a temporary interface.  We DO NOT promise to continue supporting it!
 #       (but if you want to experiment and don't mind breakage, here you go)
 AVAILABLE_PROVIDERS = {
-    "hypothesis": "hypothesis.internal.conjecture.data.PrimitiveProvider",
+    "hypothesis": "hypothesis.internal.conjecture.data.HypothesisProvider",
 }
 
 
@@ -1557,7 +1644,7 @@ def for_buffer(
         buffer: Union[List[int], bytes],
         *,
         observer: Optional[DataObserver] = None,
-        provider: type = PrimitiveProvider,
+        provider: Union[type, PrimitiveProvider] = HypothesisProvider,
     ) -> "ConjectureData":
         return cls(
             len(buffer), buffer, random=None, observer=observer, provider=provider
@@ -1569,7 +1656,7 @@ def for_ir_tree(
         ir_tree_prefix: List[IRNode],
         *,
         observer: Optional[DataObserver] = None,
-        provider: type = PrimitiveProvider,
+        provider: Union[type, PrimitiveProvider] = HypothesisProvider,
     ) -> "ConjectureData":
         return cls(
             8 * 1024,
@@ -1587,7 +1674,7 @@ def __init__(
         *,
         random: Optional[Random],
         observer: Optional[DataObserver] = None,
-        provider: type = PrimitiveProvider,
+        provider: Union[type, PrimitiveProvider] = HypothesisProvider,
         ir_tree_prefix: Optional[List[IRNode]] = None,
     ) -> None:
         if observer is None:
@@ -1621,7 +1708,9 @@ def __init__(
         self._stateful_run_times: "DefaultDict[str, float]" = defaultdict(float)
         self.max_depth = 0
         self.has_discards = False
-        self.provider = provider(self)
+
+        self.provider = provider(self) if isinstance(provider, type) else provider
+        assert isinstance(self.provider, PrimitiveProvider)
 
         self.__result: "Optional[ConjectureResult]" = None
 
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index 06f2990b2b..86a75bcc3a 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -21,7 +21,7 @@
 
 from hypothesis import HealthCheck, Phase, Verbosity, settings as Settings
 from hypothesis._settings import local_settings
-from hypothesis.errors import StopTest
+from hypothesis.errors import InvalidArgument, StopTest
 from hypothesis.internal.cache import LRUReusedCache
 from hypothesis.internal.compat import ceil, int_from_bytes
 from hypothesis.internal.conjecture.data import (
@@ -29,8 +29,8 @@
     ConjectureData,
     ConjectureResult,
     DataObserver,
+    HypothesisProvider,
     Overrun,
-    PrimitiveProvider,
     Status,
 )
 from hypothesis.internal.conjecture.datatree import DataTree, PreviouslyUnseenBehaviour
@@ -113,6 +113,19 @@ class RunIsComplete(Exception):
     pass
 
 
+def _get_provider(backend):
+    mname, cname = AVAILABLE_PROVIDERS[backend].rsplit(".", 1)
+    provider_cls = getattr(importlib.import_module(mname), cname)
+    if provider_cls.lifetime == "test_function":
+        return provider_cls(None)
+    elif provider_cls.lifetime == "test_case":
+        return provider_cls
+    else:
+        raise InvalidArgument(
+            f"invalid lifetime {provider_cls.lifetime} for provider {provider_cls.__name__}. Expected one of 'test_function', 'test_case'."
+        )
+
+
 class ConjectureRunner:
     def __init__(
         self,
@@ -150,6 +163,8 @@ def __init__(
 
         self.tree = DataTree()
 
+        self.provider = _get_provider(self.settings.backend)
+
         self.best_observed_targets = defaultdict(lambda: NO_SCORE)
         self.best_examples_of_observed_targets = {}
 
@@ -170,7 +185,7 @@ def __init__(
         self.__data_cache = LRUReusedCache(CACHE_SIZE)
 
         self.__pending_call_explanation = None
-        self._switch_to_primitive_provider = False
+        self._switch_to_hypothesis_provider = False
 
     def explain_next_call_as(self, explanation):
         self.__pending_call_explanation = explanation
@@ -928,32 +943,29 @@ def pareto_optimise(self):
 
     def _run(self):
         # have to use the primitive provider to interpret database bits...
-        self._switch_to_primitive_provider = True
+        self._switch_to_hypothesis_provider = True
         with self._log_phase_statistics("reuse"):
             self.reuse_existing_examples()
         # ...but we should use the supplied provider when generating...
-        self._switch_to_primitive_provider = False
+        self._switch_to_hypothesis_provider = False
         with self._log_phase_statistics("generate"):
             self.generate_new_examples()
             # We normally run the targeting phase mixed in with the generate phase,
             # but if we've been asked to run it but not generation then we have to
-            # run it explciitly on its own here.
+            # run it explicitly on its own here.
             if Phase.generate not in self.settings.phases:
                 self._current_phase = "target"
                 self.optimise_targets()
         # ...and back to the primitive provider when shrinking.
-        self._switch_to_primitive_provider = True
+        self._switch_to_hypothesis_provider = True
         with self._log_phase_statistics("shrink"):
             self.shrink_interesting_examples()
         self.exit_with(ExitReason.finished)
 
     def new_conjecture_data(self, prefix, max_length=BUFFER_SIZE, observer=None):
-        if self.settings.backend == "hypothesis" or self._switch_to_primitive_provider:
-            provider_cls = PrimitiveProvider
-        else:
-            mname, cname = AVAILABLE_PROVIDERS[self.settings.backend].rsplit(".", 1)
-            provider_cls = getattr(importlib.import_module(mname), cname)
-
+        provider = (
+            HypothesisProvider if self._switch_to_hypothesis_provider else self.provider
+        )
         observer = observer or self.tree.new_observer()
         if self.settings.backend != "hypothesis":  # replace with wants_datatree
             observer = DataObserver()
@@ -963,7 +975,7 @@ def new_conjecture_data(self, prefix, max_length=BUFFER_SIZE, observer=None):
             max_length=max_length,
             random=self.random,
             observer=observer,
-            provider=provider_cls,
+            provider=provider,
         )
 
     def new_conjecture_data_for_buffer(self, buffer):
diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py
index 450175a2fe..699feb63a3 100644
--- a/hypothesis-python/tests/conjecture/test_alt_backend.py
+++ b/hypothesis-python/tests/conjecture/test_alt_backend.py
@@ -31,10 +31,15 @@
 
 class PrngProvider(PrimitiveProvider):
     # A test-only implementation of the PrimitiveProvider interface, which uses
-    # a very simple PRNG to choose each value.  Dumb but efficient, and entirely
+    # a very simple PRNG to choose each value. Dumb but efficient, and entirely
     # independent of our real backend
 
+    # this could easily be test_function as well, if there's an alternative to
+    # seeding the prng with the buffer?
+    lifetime = "test_case"
+
     def __init__(self, conjecturedata: "ConjectureData", /) -> None:
+        super().__init__(conjecturedata)
         self.prng = Random(conjecturedata.buffer or None)
 
     def draw_boolean(

From 142afb9988a18fca6f9e91d5d30bb2b370a5991c Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 20:44:28 -0500
Subject: [PATCH 24/43] add track_redundant_inputs attr

---
 .../src/hypothesis/internal/conjecture/data.py   | 16 ++++++++++++++++
 .../src/hypothesis/internal/conjecture/engine.py |  4 ++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index 40c7ac44ad..53f2a8f9be 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -985,6 +985,22 @@ class PrimitiveProvider(abc.ABC):
     # Non-hypothesis providers probably want to set a lifetime of test_case.
     lifetime = "test_function"
 
+    # Whether values provided by this provider should be tracked by hypothesis
+    # such that we do not try the same input to the test function twice. Defaults
+    # to True.
+    #
+    # Providers may want to disable this if they are tracking redundant input
+    # themselves in a way that is not compatible with hypothesis, or in a way
+    # that is compatible but results in duplicated work if both were enabled.
+    #
+    # At the moment, providers which provide "symbolic" values a la
+    # hypothesis_crosshair may also want to disable this to prevent hypothesis'
+    # redundancy tracking from reifing values earlier than desired. (note for
+    # hypothesis devs: we may be able to improve this interaction in the future
+    # by updating DataTree at the end of the test case, when we can safely reify,
+    # as opposed to the middle. But this requires a significant refactoring.)
+    track_redundant_inputs = True
+
     def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None:
         self._cd = conjecturedata
 
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index 86a75bcc3a..d919e9f5ab 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -967,7 +967,7 @@ def new_conjecture_data(self, prefix, max_length=BUFFER_SIZE, observer=None):
             HypothesisProvider if self._switch_to_hypothesis_provider else self.provider
         )
         observer = observer or self.tree.new_observer()
-        if self.settings.backend != "hypothesis":  # replace with wants_datatree
+        if not self.provider.track_redundant_inputs:
             observer = DataObserver()
 
         return ConjectureData(
@@ -1117,7 +1117,7 @@ def kill_branch(self):
             prefix=buffer, max_length=max_length, observer=observer
         )
 
-        if self.settings.backend == "hypothesis":  # replace with wants_datatree
+        if self.provider.track_redundant_inputs:
             try:
                 self.tree.simulate_test_function(dummy_data)
             except PreviouslyUnseenBehaviour:

From c70795521296959c2fcf550979d7a0443f717cb1 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 21:24:19 -0500
Subject: [PATCH 25/43] prefer NotImplementedError to pass

---
 .../src/hypothesis/internal/conjecture/data.py         | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index 53f2a8f9be..c9709a2788 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -1022,7 +1022,7 @@ def draw_boolean(
         forced: Optional[bool] = None,
         fake_forced: bool = False,
     ) -> bool:
-        pass
+        raise NotImplementedError
 
     @abc.abstractmethod
     def draw_integer(
@@ -1036,7 +1036,7 @@ def draw_integer(
         forced: Optional[int] = None,
         fake_forced: bool = False,
     ) -> int:
-        pass
+        raise NotImplementedError
 
     @abc.abstractmethod
     def draw_float(
@@ -1053,7 +1053,7 @@ def draw_float(
         forced: Optional[float] = None,
         fake_forced: bool = False,
     ) -> float:
-        pass
+        raise NotImplementedError
 
     @abc.abstractmethod
     def draw_string(
@@ -1065,13 +1065,13 @@ def draw_string(
         forced: Optional[str] = None,
         fake_forced: bool = False,
     ) -> str:
-        pass
+        raise NotImplementedError
 
     @abc.abstractmethod
     def draw_bytes(
         self, size: int, *, forced: Optional[bytes] = None, fake_forced: bool = False
     ) -> bytes:
-        pass
+        raise NotImplementedError
 
 
 class HypothesisProvider(PrimitiveProvider):

From 46ed532d982d56fe13928a51ab7adf0526eab2d4 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 22:40:21 -0500
Subject: [PATCH 26/43] typing

---
 .../src/hypothesis/internal/conjecture/data.py        | 11 +++++++++++
 .../src/hypothesis/internal/conjecture/engine.py      |  4 +++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index c9709a2788..e06f8de830 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -1094,6 +1094,7 @@ def draw_boolean(
         drawn that value randomly."""
         # Note that this could also be implemented in terms of draw_integer().
 
+        assert self._cd is not None
         # NB this function is vastly more complicated than it may seem reasonable
         # for it to be. This is because it is used in a lot of places and it's
         # important for it to shrink well, so it's worth the engineering effort.
@@ -1203,6 +1204,8 @@ def draw_integer(
         forced: Optional[int] = None,
         fake_forced: bool = False,
     ) -> int:
+        assert self._cd is not None
+
         if min_value is not None:
             shrink_towards = max(min_value, shrink_towards)
         if max_value is not None:
@@ -1293,6 +1296,8 @@ def draw_float(
             smallest_nonzero_magnitude=smallest_nonzero_magnitude,
         )
 
+        assert self._cd is not None
+
         while True:
             self._cd.start_example(FLOAT_STRATEGY_DO_DRAW_LABEL)
             # If `forced in nasty_floats`, then `forced` was *probably*
@@ -1348,6 +1353,7 @@ def draw_string(
             max_size = DRAW_STRING_DEFAULT_MAX_SIZE
 
         assert forced is None or min_size <= len(forced) <= max_size
+        assert self._cd is not None
 
         average_size = min(
             max(min_size * 2, min_size + 5),
@@ -1403,6 +1409,7 @@ def draw_bytes(
             forced_i = int_from_bytes(forced)
             size = len(forced)
 
+        assert self._cd is not None
         return self._cd.draw_bits(
             8 * size, forced=forced_i, fake_forced=fake_forced
         ).to_bytes(size, "big")
@@ -1417,6 +1424,8 @@ def _draw_float(
         """
         Helper for draw_float which draws a random 64-bit float.
         """
+        assert self._cd is not None
+
         if forced is not None:
             # sign_aware_lte(forced, -0.0) does not correctly handle the
             # math.nan case here.
@@ -1436,6 +1445,7 @@ def _draw_float(
     def _draw_unbounded_integer(
         self, *, forced: Optional[int] = None, fake_forced: bool = False
     ) -> int:
+        assert self._cd is not None
         forced_i = None
         if forced is not None:
             # Using any bucket large enough to contain this integer would be a
@@ -1480,6 +1490,7 @@ def _draw_bounded_integer(
     ) -> int:
         assert lower <= upper
         assert forced is None or lower <= forced <= upper
+        assert self._cd is not None
         if lower == upper:
             # Write a value even when this is trivial so that when a bound depends
             # on other values we don't suddenly disappear when the gap shrinks to
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index d919e9f5ab..ed7d05e2dd 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -16,6 +16,7 @@
 from datetime import timedelta
 from enum import Enum
 from random import Random, getrandbits
+from typing import Union
 
 import attr
 
@@ -31,6 +32,7 @@
     DataObserver,
     HypothesisProvider,
     Overrun,
+    PrimitiveProvider,
     Status,
 )
 from hypothesis.internal.conjecture.datatree import DataTree, PreviouslyUnseenBehaviour
@@ -113,7 +115,7 @@ class RunIsComplete(Exception):
     pass
 
 
-def _get_provider(backend):
+def _get_provider(backend: str) -> Union[type, PrimitiveProvider]:
     mname, cname = AVAILABLE_PROVIDERS[backend].rsplit(".", 1)
     provider_cls = getattr(importlib.import_module(mname), cname)
     if provider_cls.lifetime == "test_function":

From 60915c3b2b309841a53acce4b3153da10fd82b7d Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 22:40:29 -0500
Subject: [PATCH 27/43] fix backend default

was causing parent settings to not be inherited properly
---
 hypothesis-python/src/hypothesis/_settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hypothesis-python/src/hypothesis/_settings.py b/hypothesis-python/src/hypothesis/_settings.py
index e4d9db27e2..6910104cef 100644
--- a/hypothesis-python/src/hypothesis/_settings.py
+++ b/hypothesis-python/src/hypothesis/_settings.py
@@ -165,7 +165,7 @@ def __init__(
         suppress_health_check: Collection["HealthCheck"] = not_set,  # type: ignore
         deadline: Union[int, float, datetime.timedelta, None] = not_set,  # type: ignore
         print_blob: bool = not_set,  # type: ignore
-        backend: str = "hypothesis",
+        backend: str = not_set,
     ) -> None:
         if parent is not None:
             check_type(settings, parent, "parent")

From 2ac416a9696b6a6569fb9c5a475c95991bef96f0 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 22:40:45 -0500
Subject: [PATCH 28/43] make test easier to pass

---
 .../tests/conjecture/test_alt_backend.py              | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py
index 699feb63a3..0754e71bd8 100644
--- a/hypothesis-python/tests/conjecture/test_alt_backend.py
+++ b/hypothesis-python/tests/conjecture/test_alt_backend.py
@@ -161,10 +161,10 @@ def temp_register_backend():
     "strategy",
     [
         st.booleans(),
-        st.integers(0, 10),
-        st.floats(allow_nan=False),
-        st.text(max_size=5),
-        st.binary(max_size=5),
+        st.integers(0, 3),
+        st.floats(0, 1),
+        st.text(max_size=3),
+        st.binary(max_size=3),
     ],
     ids=repr,
 )
@@ -178,8 +178,7 @@ def test_find_with_backend_then_convert_to_buffer_shrink_and_replay(strategy):
         @given(strategy)
         def test(value):
             if isinstance(value, float):
-                # randomly generating 0 for floats is really unlikely
-                assert value not in [math.inf, -math.inf]
+                assert value >= 0.5
             else:
                 assert value
 

From f409f7be97e32fdeff57476d5d993f8ad535a93b Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 22:40:51 -0500
Subject: [PATCH 29/43] add tests for backend attrs and shrinking

---
 .../tests/conjecture/test_alt_backend.py      | 189 +++++++++++++++++-
 1 file changed, 182 insertions(+), 7 deletions(-)

diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py
index 0754e71bd8..81b40f09ce 100644
--- a/hypothesis-python/tests/conjecture/test_alt_backend.py
+++ b/hypothesis-python/tests/conjecture/test_alt_backend.py
@@ -18,11 +18,14 @@
 
 from hypothesis import given, settings, strategies as st
 from hypothesis.database import InMemoryExampleDatabase
+from hypothesis.errors import InvalidArgument
+from hypothesis.internal.compat import int_to_bytes
 from hypothesis.internal.conjecture.data import (
     AVAILABLE_PROVIDERS,
     ConjectureData,
     PrimitiveProvider,
 )
+from hypothesis.internal.conjecture.engine import ConjectureRunner
 from hypothesis.internal.floats import SIGNALING_NAN
 from hypothesis.internal.intervalsets import IntervalSet
 
@@ -149,12 +152,12 @@ def draw_bytes(
 
 
 @contextmanager
-def temp_register_backend():
+def temp_register_backend(name, cls):
     try:
-        AVAILABLE_PROVIDERS["prng"] = f"{__name__}.{PrngProvider.__name__}"
+        AVAILABLE_PROVIDERS[name] = f"{__name__}.{cls.__name__}"
         yield
     finally:
-        AVAILABLE_PROVIDERS.pop("prng")
+        AVAILABLE_PROVIDERS.pop(name)
 
 
 @pytest.mark.parametrize(
@@ -172,7 +175,7 @@ def test_find_with_backend_then_convert_to_buffer_shrink_and_replay(strategy):
     db = InMemoryExampleDatabase()
     assert not db.data
 
-    with temp_register_backend():
+    with temp_register_backend("prng", PrngProvider):
 
         @settings(database=db, backend="prng")
         @given(strategy)
@@ -190,10 +193,182 @@ def test(value):
     assert buffers, db.data
 
 
-def test_backend_can_shrink():
-    with temp_register_backend():
+def test_backend_can_shrink_integers():
+    with temp_register_backend("prng", PrngProvider):
         n = minimal(
-            st.integers(), lambda n: n >= 123456, settings=settings(backend="prng")
+            st.integers(),
+            lambda n: n >= 123456,
+            settings=settings(backend="prng", database=None),
         )
 
     assert n == 123456
+
+
+def test_backend_can_shrink_bytes():
+    with temp_register_backend("prng", PrngProvider):
+        b = minimal(
+            # this test doubles as coverage for popping draw_bytes ir nodes,
+            # and that path is only taken with fixed size for the moment. can
+            # be removed when we support variable length binary at the ir level.
+            st.binary(min_size=2, max_size=2),
+            lambda b: len(b) >= 2 and b[1] >= 10,
+            settings=settings(backend="prng", database=None),
+        )
+
+    assert b == int_to_bytes(10, size=2)
+
+
+def test_backend_can_shrink_strings():
+    with temp_register_backend("prng", PrngProvider):
+        s = minimal(
+            st.text(),
+            lambda s: len(s) >= 10,
+            settings=settings(backend="prng", database=None),
+        )
+
+    assert len(s) == 10
+
+
+def test_backend_can_shrink_booleans():
+    with temp_register_backend("prng", PrngProvider):
+        b = minimal(
+            st.booleans(), lambda b: b, settings=settings(backend="prng", database=None)
+        )
+
+    assert b
+
+
+def test_backend_can_shrink_floats():
+    with temp_register_backend("prng", PrngProvider):
+        f = minimal(
+            st.floats(),
+            lambda f: f >= 100.5,
+            settings=settings(backend="prng", database=None),
+        )
+
+    assert f == 101.0
+
+
+# trivial provider for tests which don't care about drawn distributions.
+class TrivialProvider(PrimitiveProvider):
+    def draw_integer(self, *args, **kwargs):
+        return 1
+
+    def draw_boolean(self, *args, **kwargs):
+        return True
+
+    def draw_float(self, *args, **kwargs):
+        return 1.0
+
+    def draw_bytes(self, *args, **kwargs):
+        return b""
+
+    def draw_string(self, *args, **kwargs):
+        return ""
+
+
+class InvalidLifetime(TrivialProvider):
+
+    lifetime = "forever and a day!"
+
+
+def test_invalid_lifetime():
+    with temp_register_backend("invalid_lifetime", InvalidLifetime):
+        with pytest.raises(InvalidArgument):
+            ConjectureRunner(
+                lambda: True, settings=settings(backend="invalid_lifetime")
+            )
+
+
+function_lifetime_init_count = 0
+
+
+class LifetimeTestFunction(TrivialProvider):
+    lifetime = "test_function"
+
+    def __init__(self, conjecturedata):
+        super().__init__(conjecturedata)
+        # hacky, but no easy alternative.
+        global function_lifetime_init_count
+        function_lifetime_init_count += 1
+
+
+def test_function_lifetime():
+    with temp_register_backend("lifetime_function", LifetimeTestFunction):
+
+        @given(st.integers())
+        @settings(backend="lifetime_function")
+        def test_function(n):
+            pass
+
+        assert function_lifetime_init_count == 0
+        test_function()
+        assert function_lifetime_init_count == 1
+        test_function()
+        assert function_lifetime_init_count == 2
+
+
+test_case_lifetime_init_count = 0
+
+
+class LifetimeTestCase(TrivialProvider):
+    lifetime = "test_case"
+
+    def __init__(self, conjecturedata):
+        super().__init__(conjecturedata)
+        global test_case_lifetime_init_count
+        test_case_lifetime_init_count += 1
+
+
+def test_case_lifetime():
+    test_function_count = 0
+
+    with temp_register_backend("lifetime_case", LifetimeTestCase):
+
+        @given(st.integers())
+        @settings(backend="lifetime_case")
+        def test_function(n):
+            nonlocal test_function_count
+            test_function_count += 1
+
+        assert test_case_lifetime_init_count == 0
+        test_function()
+
+        # we create a new provider each time we *try* to generate an input to the
+        # test function, but this could be filtered out, discarded as duplicate,
+        # etc. We also sometimes try predetermined inputs to the test function,
+        # such as the zero buffer, which does not entail creating providers.
+        # These two facts combined mean that the number of inits could be
+        # anywhere reasonably close to the number of function calls.
+        assert (
+            test_function_count - 10
+            <= test_case_lifetime_init_count
+            <= test_function_count + 10
+        )
+
+
+class TrackRedundant(TrivialProvider):
+    track_redundant_inputs = True
+
+
+class NoTrackRedundant(TrivialProvider):
+    track_redundant_inputs = False
+
+
+@pytest.mark.parametrize("track_redundancy", [True, False])
+def test_tracks_redundant_inputs(track_redundancy):
+    provider = TrackRedundant if track_redundancy else NoTrackRedundant
+
+    def test_function(data):
+        data.draw_integer()
+
+    with temp_register_backend("maybe_redundant", provider):
+        runner = ConjectureRunner(
+            test_function, settings=settings(backend="maybe_redundant")
+        )
+        runner.run()
+
+    if track_redundancy:
+        assert len(runner.tree.root.values) > 0
+    else:
+        assert len(runner.tree.root.values) == 0

From 7c1b229212cf9a33a1bc8d2f129b2faeb25b9c20 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 22:40:56 -0500
Subject: [PATCH 30/43] reword comment

---
 .../src/hypothesis/internal/conjecture/engine.py       | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index ed7d05e2dd..d0d77a149e 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -733,9 +733,13 @@ def generate_new_examples(self):
         ran_optimisations = False
 
         while self.should_generate_more():
-            # There's no convenient way to track redundancy for custom backends
-            # yet. Will possibly improved when everything moves to the ir and we
-            # can use the DataTree for all backends?
+            # we'd love to use datatree to deduplicate inputs for the ir.
+            # Unfortunately its exhaustion logic is tighly coupled to the bounds
+            # of the HypothesisProvider, and this mismatch between what the
+            # datatree thinks is possible to generate and what non-hypothesis
+            # providers can actually generate can lead to e.g. infinite loops.
+            # We likely need a proper api for backends to communicate the size
+            # of their ir pools before we can use the datatree here.
             if self.settings.backend != "hypothesis":
                 data = self.new_conjecture_data(prefix=b"", max_length=BUFFER_SIZE)
                 self.test_function(data)

From d9de115cda4e8dd67aa5114e0ef85b65ed6f1c54 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 22:43:05 -0500
Subject: [PATCH 31/43] remove track_redundant_inputs :(

hopefully we can bring this back in the future
---
 .../hypothesis/internal/conjecture/data.py    | 16 -----------
 .../hypothesis/internal/conjecture/engine.py  |  6 ++---
 .../tests/conjecture/test_alt_backend.py      | 27 -------------------
 3 files changed, 3 insertions(+), 46 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index e06f8de830..92cd64455e 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -985,22 +985,6 @@ class PrimitiveProvider(abc.ABC):
     # Non-hypothesis providers probably want to set a lifetime of test_case.
     lifetime = "test_function"
 
-    # Whether values provided by this provider should be tracked by hypothesis
-    # such that we do not try the same input to the test function twice. Defaults
-    # to True.
-    #
-    # Providers may want to disable this if they are tracking redundant input
-    # themselves in a way that is not compatible with hypothesis, or in a way
-    # that is compatible but results in duplicated work if both were enabled.
-    #
-    # At the moment, providers which provide "symbolic" values a la
-    # hypothesis_crosshair may also want to disable this to prevent hypothesis'
-    # redundancy tracking from reifing values earlier than desired. (note for
-    # hypothesis devs: we may be able to improve this interaction in the future
-    # by updating DataTree at the end of the test case, when we can safely reify,
-    # as opposed to the middle. But this requires a significant refactoring.)
-    track_redundant_inputs = True
-
     def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None:
         self._cd = conjecturedata
 
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index d0d77a149e..6b85a440ee 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -734,7 +734,7 @@ def generate_new_examples(self):
 
         while self.should_generate_more():
             # we'd love to use datatree to deduplicate inputs for the ir.
-            # Unfortunately its exhaustion logic is tighly coupled to the bounds
+            # Unfortunately its exhaustion logic is tightly coupled to the bounds
             # of the HypothesisProvider, and this mismatch between what the
             # datatree thinks is possible to generate and what non-hypothesis
             # providers can actually generate can lead to e.g. infinite loops.
@@ -973,7 +973,7 @@ def new_conjecture_data(self, prefix, max_length=BUFFER_SIZE, observer=None):
             HypothesisProvider if self._switch_to_hypothesis_provider else self.provider
         )
         observer = observer or self.tree.new_observer()
-        if not self.provider.track_redundant_inputs:
+        if self.settings.backend != "hypothesis":
             observer = DataObserver()
 
         return ConjectureData(
@@ -1123,7 +1123,7 @@ def kill_branch(self):
             prefix=buffer, max_length=max_length, observer=observer
         )
 
-        if self.provider.track_redundant_inputs:
+        if self.settings.backend == "hypothesis":
             try:
                 self.tree.simulate_test_function(dummy_data)
             except PreviouslyUnseenBehaviour:
diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py
index 81b40f09ce..7301c02188 100644
--- a/hypothesis-python/tests/conjecture/test_alt_backend.py
+++ b/hypothesis-python/tests/conjecture/test_alt_backend.py
@@ -345,30 +345,3 @@ def test_function(n):
             <= test_case_lifetime_init_count
             <= test_function_count + 10
         )
-
-
-class TrackRedundant(TrivialProvider):
-    track_redundant_inputs = True
-
-
-class NoTrackRedundant(TrivialProvider):
-    track_redundant_inputs = False
-
-
-@pytest.mark.parametrize("track_redundancy", [True, False])
-def test_tracks_redundant_inputs(track_redundancy):
-    provider = TrackRedundant if track_redundancy else NoTrackRedundant
-
-    def test_function(data):
-        data.draw_integer()
-
-    with temp_register_backend("maybe_redundant", provider):
-        runner = ConjectureRunner(
-            test_function, settings=settings(backend="maybe_redundant")
-        )
-        runner.run()
-
-    if track_redundancy:
-        assert len(runner.tree.root.values) > 0
-    else:
-        assert len(runner.tree.root.values) == 0

From 079b8eaa09cc9a856121abeeb4ff1ce4ef64671a Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 22:43:29 -0500
Subject: [PATCH 32/43] type ignore setting

---
 hypothesis-python/src/hypothesis/_settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hypothesis-python/src/hypothesis/_settings.py b/hypothesis-python/src/hypothesis/_settings.py
index 6910104cef..47be269970 100644
--- a/hypothesis-python/src/hypothesis/_settings.py
+++ b/hypothesis-python/src/hypothesis/_settings.py
@@ -165,7 +165,7 @@ def __init__(
         suppress_health_check: Collection["HealthCheck"] = not_set,  # type: ignore
         deadline: Union[int, float, datetime.timedelta, None] = not_set,  # type: ignore
         print_blob: bool = not_set,  # type: ignore
-        backend: str = not_set,
+        backend: str = not_set,  # type: ignore
     ) -> None:
         if parent is not None:
             check_type(settings, parent, "parent")

From 8329f955c78770362d79b6b5aebb09feab59f859 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Mon, 4 Mar 2024 23:34:13 -0500
Subject: [PATCH 33/43] reword comment again

old explanation was mistaken
---
 .../src/hypothesis/internal/conjecture/engine.py      | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index 6b85a440ee..532431d2ab 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -733,13 +733,10 @@ def generate_new_examples(self):
         ran_optimisations = False
 
         while self.should_generate_more():
-            # we'd love to use datatree to deduplicate inputs for the ir.
-            # Unfortunately its exhaustion logic is tightly coupled to the bounds
-            # of the HypothesisProvider, and this mismatch between what the
-            # datatree thinks is possible to generate and what non-hypothesis
-            # providers can actually generate can lead to e.g. infinite loops.
-            # We likely need a proper api for backends to communicate the size
-            # of their ir pools before we can use the datatree here.
+            # Unfortunately generate_novel_prefix still operates in terms of
+            # a buffer and uses HypothesisProvider as its backing provider,
+            # not whatever is specified by the backend. We can improve this
+            # once more things are on the ir.
             if self.settings.backend != "hypothesis":
                 data = self.new_conjecture_data(prefix=b"", max_length=BUFFER_SIZE)
                 self.test_function(data)

From f868f00f3a33d5f4f70df7083a40bd552f0b4639 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Tue, 5 Mar 2024 00:20:20 -0500
Subject: [PATCH 34/43] update ghostwriter for backend

---
 .../tests/ghostwriter/recorded/hypothesis_module_magic.txt      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hypothesis-python/tests/ghostwriter/recorded/hypothesis_module_magic.txt b/hypothesis-python/tests/ghostwriter/recorded/hypothesis_module_magic.txt
index 2667fa5546..2bfb4f1b83 100644
--- a/hypothesis-python/tests/ghostwriter/recorded/hypothesis_module_magic.txt
+++ b/hypothesis-python/tests/ghostwriter/recorded/hypothesis_module_magic.txt
@@ -73,7 +73,7 @@ def test_fuzz_seed(seed: typing.Hashable) -> None:
     suppress_health_check=st.just(not_set),
     deadline=st.just(not_set),
     print_blob=st.just(not_set),
-    backend=st.just("hypothesis"),
+    backend=st.just(not_set),
 )
 def test_fuzz_settings(
     parent: typing.Optional[hypothesis.settings],

From eb0a7bca584ac475eec6939e9112214dc8fdd836 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Tue, 5 Mar 2024 00:23:47 -0500
Subject: [PATCH 35/43] lint

---
 hypothesis-python/src/hypothesis/internal/conjecture/engine.py | 3 ++-
 hypothesis-python/src/hypothesis/internal/conjecture/utils.py  | 1 +
 hypothesis-python/tests/cover/test_health_checks.py            | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index 532431d2ab..b0cf812298 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -124,7 +124,8 @@ def _get_provider(backend: str) -> Union[type, PrimitiveProvider]:
         return provider_cls
     else:
         raise InvalidArgument(
-            f"invalid lifetime {provider_cls.lifetime} for provider {provider_cls.__name__}. Expected one of 'test_function', 'test_case'."
+            f"invalid lifetime {provider_cls.lifetime} for provider {provider_cls.__name__}. "
+            "Expected one of 'test_function', 'test_case'."
         )
 
 
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/utils.py b/hypothesis-python/src/hypothesis/internal/conjecture/utils.py
index 7ce126f2f0..509c03ef71 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/utils.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/utils.py
@@ -169,6 +169,7 @@ def __init__(self, weights: Sequence[float], *, observe: bool = True):
     def sample(
         self,
         data: "ConjectureData",
+        *,
         forced: Optional[int] = None,
         fake_forced: bool = False,
     ) -> int:
diff --git a/hypothesis-python/tests/cover/test_health_checks.py b/hypothesis-python/tests/cover/test_health_checks.py
index a4165b715f..72bbb1d675 100644
--- a/hypothesis-python/tests/cover/test_health_checks.py
+++ b/hypothesis-python/tests/cover/test_health_checks.py
@@ -237,7 +237,7 @@ def test_does_not_trigger_health_check_on_simple_strategies(monkeypatch):
     # We need to make drawing data artificially slow in order to trigger this
     # effect. This isn't actually slow because time is fake in our CI, but
     # we need it to pretend to be.
-    def draw_bits(self, n, forced=None, fake_forced=False):
+    def draw_bits(self, n, *, forced=None, fake_forced=False):
         time.sleep(0.001)
         return existing_draw_bits(self, n, forced=forced, fake_forced=fake_forced)
 

From 6cdf6830704711913470837092d2892331de0111 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Tue, 5 Mar 2024 00:51:27 -0500
Subject: [PATCH 36/43] did not mean to delete that

---
 hypothesis-python/src/hypothesis/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
index e038f30565..402382c6aa 100644
--- a/hypothesis-python/src/hypothesis/core.py
+++ b/hypothesis-python/src/hypothesis/core.py
@@ -1119,7 +1119,7 @@ def run_engine(self):
             else:
                 database_key = None
 
-        runner = ConjectureRunner(
+        runner = self._runner = ConjectureRunner(
             self._execute_once_for_engine,
             settings=self.settings,
             random=self.random,

From f180bf3be56b452259f31dd6c10f834c544a8082 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Wed, 6 Mar 2024 16:53:13 -0500
Subject: [PATCH 37/43] use BUFFER_SIZE

---
 hypothesis-python/src/hypothesis/internal/conjecture/data.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index 92cd64455e..6d80b902a7 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -1669,8 +1669,10 @@ def for_ir_tree(
         observer: Optional[DataObserver] = None,
         provider: Union[type, PrimitiveProvider] = HypothesisProvider,
     ) -> "ConjectureData":
+        from hypothesis.internal.conjecture.engine import BUFFER_SIZE
+
         return cls(
-            8 * 1024,
+            BUFFER_SIZE,
             b"",
             random=None,
             ir_tree_prefix=ir_tree_prefix,

From 66782331c220660024861c8a69ad1569736940a4 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Wed, 6 Mar 2024 16:53:18 -0500
Subject: [PATCH 38/43] remove database=None

---
 hypothesis-python/docs/strategies.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hypothesis-python/docs/strategies.rst b/hypothesis-python/docs/strategies.rst
index c60bef9d3f..a5c07e0dae 100644
--- a/hypothesis-python/docs/strategies.rst
+++ b/hypothesis-python/docs/strategies.rst
@@ -233,7 +233,7 @@ a solver-backed test might look something like:
     from hypothesis import given, settings, strategies as st
 
 
-    @settings(backend="crosshair", database=None)
+    @settings(backend="crosshair")
     @given(st.integers())
     def test_needs_solver(x):
         assert x != 123456789

From beac7df26b16a51fe93ff117bda7752de01fa7dd Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Fri, 8 Mar 2024 22:42:27 -0500
Subject: [PATCH 39/43] remove stale condition, that doesnt play well with new
 backends

---
 .../src/hypothesis/strategies/_internal/strategies.py        | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py b/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py
index 46d4005cdb..496f95bf58 100644
--- a/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py
+++ b/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py
@@ -1002,7 +1002,6 @@ def do_draw(self, data: ConjectureData) -> Ex:
 
     def do_filtered_draw(self, data):
         for i in range(3):
-            start_index = data.index
             data.start_example(FILTERED_SEARCH_STRATEGY_DO_DRAW_LABEL)
             value = data.draw(self.filtered_strategy)
             if self.condition(value):
@@ -1012,10 +1011,6 @@ def do_filtered_draw(self, data):
                 data.stop_example(discard=True)
                 if i == 0:
                     data.events[f"Retried draw from {self!r} to satisfy filter"] = ""
-                # This is to guard against the case where we consume no data.
-                # As long as we consume data, we'll eventually pass or raise.
-                # But if we don't this could be an infinite loop.
-                assume(data.index > start_index)
 
         return filter_not_satisfied
 

From d94b801e96e8cc07c7fabac21f311473993960b5 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Sat, 9 Mar 2024 00:17:11 -0500
Subject: [PATCH 40/43] remove prng provider seeding

I'm not sure this was doing anything since the buffer should be empty always
---
 hypothesis-python/tests/conjecture/test_alt_backend.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py
index 7301c02188..e55c6aea3a 100644
--- a/hypothesis-python/tests/conjecture/test_alt_backend.py
+++ b/hypothesis-python/tests/conjecture/test_alt_backend.py
@@ -37,13 +37,9 @@ class PrngProvider(PrimitiveProvider):
     # a very simple PRNG to choose each value. Dumb but efficient, and entirely
     # independent of our real backend
 
-    # this could easily be test_function as well, if there's an alternative to
-    # seeding the prng with the buffer?
-    lifetime = "test_case"
-
     def __init__(self, conjecturedata: "ConjectureData", /) -> None:
         super().__init__(conjecturedata)
-        self.prng = Random(conjecturedata.buffer or None)
+        self.prng = Random()
 
     def draw_boolean(
         self,

From ec3ef6cb4b1df7ecb80f4fcf9bed2341c36c47fd Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Sat, 9 Mar 2024 00:17:51 -0500
Subject: [PATCH 41/43] format

---
 .../src/hypothesis/strategies/_internal/strategies.py           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py b/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py
index 496f95bf58..83b0e6b059 100644
--- a/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py
+++ b/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py
@@ -28,7 +28,7 @@
 )
 
 from hypothesis._settings import HealthCheck, Phase, Verbosity, settings
-from hypothesis.control import _current_build_context, assume
+from hypothesis.control import _current_build_context
 from hypothesis.errors import (
     HypothesisException,
     HypothesisWarning,

From 7e1a86cb3e05f8dea62dd6baceb9f14b21048dfd Mon Sep 17 00:00:00 2001
From: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
Date: Fri, 8 Mar 2024 23:35:55 -0800
Subject: [PATCH 42/43] Small cleanups

---
 hypothesis-python/docs/strategies.rst                  | 2 +-
 hypothesis-python/setup.py                             | 3 +--
 hypothesis-python/src/hypothesis/_settings.py          | 9 +++++++--
 hypothesis-python/tests/conjecture/test_alt_backend.py | 2 +-
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/hypothesis-python/docs/strategies.rst b/hypothesis-python/docs/strategies.rst
index a5c07e0dae..8670b08625 100644
--- a/hypothesis-python/docs/strategies.rst
+++ b/hypothesis-python/docs/strategies.rst
@@ -224,7 +224,7 @@ See :issue:`3086` for details, e.g. if you're interested in writing your own bac
 what that should eventually look like, and we're likely to make regular breaking
 changes for some time to come)
 
-Using the prototype :pypi:`crosshair-tool` backend `via this schemathesis
+Using the prototype :pypi:`crosshair-tool` backend `via this plugin
 <https://github.com/pschanely/hypothesis-crosshair>`__,
 a solver-backed test might look something like:
 
diff --git a/hypothesis-python/setup.py b/hypothesis-python/setup.py
index cf29c2732a..494a305321 100644
--- a/hypothesis-python/setup.py
+++ b/hypothesis-python/setup.py
@@ -60,6 +60,7 @@ def local_file(name):
     "pytest": ["pytest>=4.6"],
     "dpcontracts": ["dpcontracts>=0.4"],
     "redis": ["redis>=3.0.0"],
+    "crosshair": ["hypothesis-crosshair>=0.0.1", "crosshair-tool>=0.0.50"],
     # zoneinfo is an odd one: every dependency is conditional, because they're
     # only necessary on old versions of Python or Windows systems or emscripten.
     "zoneinfo": [
@@ -71,8 +72,6 @@ def local_file(name):
     # We also leave the choice of timezone library to the user, since it
     # might be zoneinfo or pytz depending on version and configuration.
     "django": ["django>=3.2"],
-    # TODO: https://github.com/pschanely/hypothesis-crosshair/ extra once released
-    # "crosshair": ["hypothesis-crosshair-tool >= ???"],
 }
 
 extras["all"] = sorted(set(sum(extras.values(), [])))
diff --git a/hypothesis-python/src/hypothesis/_settings.py b/hypothesis-python/src/hypothesis/_settings.py
index 47be269970..061292e9bd 100644
--- a/hypothesis-python/src/hypothesis/_settings.py
+++ b/hypothesis-python/src/hypothesis/_settings.py
@@ -718,8 +718,13 @@ def _backend_validator(value):
     from hypothesis.internal.conjecture.data import AVAILABLE_PROVIDERS
 
     if value not in AVAILABLE_PROVIDERS:
-        msg = f"Invalid backend, {value!r}. Valid options: {sorted(AVAILABLE_PROVIDERS)!r}"
-        raise InvalidArgument(msg)
+        if value == "crosshair":  # pragma: no cover
+            install = '`pip install "hypothesis[crosshair]"` and try again.'
+            raise InvalidArgument(f"backend={value!r} is not available.  {install}")
+        raise InvalidArgument(
+            f"backend={value!r} is not available - maybe you need to install a plugin?"
+            f"\n    Installed backends: {sorted(AVAILABLE_PROVIDERS)!r}"
+        )
     return value
 
 
diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py
index e55c6aea3a..0eda6e93e4 100644
--- a/hypothesis-python/tests/conjecture/test_alt_backend.py
+++ b/hypothesis-python/tests/conjecture/test_alt_backend.py
@@ -322,7 +322,7 @@ def test_case_lifetime():
     with temp_register_backend("lifetime_case", LifetimeTestCase):
 
         @given(st.integers())
-        @settings(backend="lifetime_case")
+        @settings(backend="lifetime_case", database=InMemoryExampleDatabase())
         def test_function(n):
             nonlocal test_function_count
             test_function_count += 1

From 732291329441cdee87ef706f15079b5dea425981 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Sat, 9 Mar 2024 13:41:06 -0500
Subject: [PATCH 43/43] correct lifetime recommendation

---
 hypothesis-python/src/hypothesis/internal/conjecture/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index 6d80b902a7..486709edbb 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -982,7 +982,7 @@ class PrimitiveProvider(abc.ABC):
     # time hypothesis tries to generate a new input to the test function. This
     # lifetime can access the passed ConjectureData object.
     #
-    # Non-hypothesis providers probably want to set a lifetime of test_case.
+    # Non-hypothesis providers probably want to set a lifetime of test_function.
     lifetime = "test_function"
 
     def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None: