Merge pull request #2049 from Zac-HD/array-warning

Detect and warn on float truncation in arrays
HypothesisWorks · Jul 28, 2019 · 5896334 · 5896334
2 parents 39ca4ad + 1a3cc22
commit 5896334
Show file tree

Hide file tree

Showing 5 changed files with 89 additions and 36 deletions.
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,11 @@
+RELEASE_TYPE: minor
+
+This release makes :func:`~hypothesis.extra.numpy.arrays` more pedantic about
+``elements`` strategies that cannot be exactly represented as array elements.
+
+In practice, you will see new warnings if you were using a ``float16`` or
+``float32`` dtype without passing :func:`~hypothesis.strategies.floats` the
+``width=16`` or ``width=32`` arguments respectively.
+
+The previous behaviour could lead to silent truncation, and thus some elements
+being equal to an explicitly excluded bound (:issue:`1899`).
diff --git a/hypothesis-python/src/hypothesis/extra/numpy.py b/hypothesis-python/src/hypothesis/extra/numpy.py
@@ -133,42 +133,18 @@ def __init__(self, element_strategy, shape, dtype, fill, unique):
         self.element_strategy = element_strategy
         self.unique = unique
 
-        # Used by self.insert_element to check that the value can be stored
-        # in the array without e.g. overflowing.  See issues #1385 and #1591.
-        if dtype.kind in (u"i", u"u"):
-            self.check_cast = lambda x: np.can_cast(x, self.dtype, "safe")
-        elif dtype.kind == u"f" and dtype.itemsize == 2:
-            max_f2 = (2.0 - 2 ** -10) * 2 ** 15
-            self.check_cast = lambda x: (not np.isfinite(x)) or (-max_f2 <= x <= max_f2)
-        elif dtype.kind == u"f" and dtype.itemsize == 4:
-            max_f4 = (2.0 - 2 ** -23) * 2 ** 127
-            self.check_cast = lambda x: (not np.isfinite(x)) or (-max_f4 <= x <= max_f4)
-        elif dtype.kind == u"c" and dtype.itemsize == 8:
-            max_f4 = (2.0 - 2 ** -23) * 2 ** 127
-            self.check_cast = lambda x: (not np.isfinite(x)) or (
-                -max_f4 <= x.real <= max_f4 and -max_f4 <= x.imag <= max_f4
-            )
-        elif dtype.kind == u"U":
-            length = dtype.itemsize // 4
-            self.check_cast = lambda x: len(x) <= length and u"\0" not in x[length:]
-        elif dtype.kind in (u"S", u"a"):
-            self.check_cast = (
-                lambda x: len(x) <= dtype.itemsize and b"\0" not in x[dtype.itemsize :]
-            )
-        else:
-            self.check_cast = lambda x: True
-
     def set_element(self, data, result, idx, strategy=None):
         strategy = strategy or self.element_strategy
         val = data.draw(strategy)
         result[idx] = val
-        if self._report_overflow and not self.check_cast(val):
+        if self._report_overflow and val != result[idx] and val == val:
             note_deprecation(
                 "Generated array element %r from %r cannot be represented as "
-                "dtype %r - instead it becomes %r .  Consider using a more "
-                "precise strategy, as this will be an error in a future "
-                "version." % (val, strategy, self.dtype, result[idx]),
-                since="2018-10-25",
+                "dtype %r - instead it becomes %r (type %r).  Consider using a more "
+                "precise strategy, for example passing the `width` argument to "
+                "`floats()`, as this will be an error in a future version."
+                % (val, strategy, self.dtype, result[idx], type(result[idx])),
+                since="RELEASEDAY",
             )
             # Because the message includes the value of the generated element,
             # it would be easy to spam users with thousands of warnings.
@@ -180,7 +156,8 @@ def do_draw(self, data):
             return np.zeros(dtype=self.dtype, shape=self.shape)
 
         # Reset this flag for each test case to emit warnings from set_element
-        self._report_overflow = True
+        # Skip the check for object or void (multi-element) dtypes
+        self._report_overflow = self.dtype.kind not in ("O", "V")
 
         # This could legitimately be a np.empty, but the performance gains for
         # that would be so marginal that there's really not much point risking

diff --git a/hypothesis-python/tests/cover/test_numerics.py b/hypothesis-python/tests/cover/test_numerics.py
@@ -18,15 +18,68 @@
 from __future__ import absolute_import, division, print_function
 
 import decimal
+from math import copysign
 
 import pytest
 
 from hypothesis import assume, given, reject, settings
-from hypothesis.errors import InvalidArgument
-from hypothesis.strategies import data, decimals, fractions, integers, none, tuples
+from hypothesis.errors import HypothesisDeprecationWarning, InvalidArgument
+from hypothesis.internal.floats import next_down
+from hypothesis.strategies import (
+    booleans,
+    data,
+    decimals,
+    floats,
+    fractions,
+    integers,
+    none,
+    tuples,
+)
 from tests.common.debug import find_any
 
 
+@given(data())
+def test_fuzz_floats_bounds(data):
+    bound = none() | floats(allow_nan=False)
+    low, high = data.draw(tuples(bound, bound), label="low, high")
+    if low is not None and high is not None and low > high:
+        low, high = high, low
+    exmin = (
+        low is not None
+        and low != float("inf")
+        and data.draw(booleans(), label="exclude_min")
+    )
+    exmax = (
+        high is not None
+        and high != float("-inf")
+        and data.draw(booleans(), label="exclude_max")
+    )
+    try:
+        val = data.draw(
+            floats(low, high, exclude_min=exmin, exclude_max=exmax), label="value"
+        )
+        assume(val)  # positive/negative zero is an issue
+    except (InvalidArgument, HypothesisDeprecationWarning):
+        assert (
+            (exmin and exmax and low == next_down(high))
+            or (low == high and (exmin or exmax))
+            or (
+                low == high == 0
+                and copysign(1.0, low) == 1
+                and copysign(1.0, high) == -1
+            )
+        )
+        reject()  # no floats in required range
+    if low is not None:
+        assert low <= val
+    if high is not None:
+        assert val <= high
+    if exmin:
+        assert low != val
+    if exmax:
+        assert high != val
+
+
 @given(data())
 def test_fuzz_fractions_bounds(data):
     denom = data.draw(none() | integers(1, 100), label="denominator")

diff --git a/hypothesis-python/tests/numpy/test_gen_data.py b/hypothesis-python/tests/numpy/test_gen_data.py
@@ -268,7 +268,7 @@ def test_can_cast_for_scalars(data):
     # combinations will result in an error if numpy is not able to cast them.
     dt_elements = np.dtype(data.draw(st.sampled_from(["bool", "<i2", ">i2"])))
     dt_desired = np.dtype(
-        data.draw(st.sampled_from(["<i2", ">i2", "float16", "float32", "float64"]))
+        data.draw(st.sampled_from(["<i2", ">i2", "float32", "float64"]))
     )
     result = data.draw(
         nps.arrays(dtype=dt_desired, elements=nps.from_dtype(dt_elements), shape=())
@@ -283,7 +283,7 @@ def test_can_cast_for_arrays(data):
     # combinations will result in an error if numpy is not able to cast them.
     dt_elements = np.dtype(data.draw(st.sampled_from(["bool", "<i2", ">i2"])))
     dt_desired = np.dtype(
-        data.draw(st.sampled_from(["<i2", ">i2", "float16", "float32", "float64"]))
+        data.draw(st.sampled_from(["<i2", ">i2", "float32", "float64"]))
     )
     result = data.draw(
         nps.arrays(

diff --git a/hypothesis-python/tests/numpy/test_narrow_floats.py b/hypothesis-python/tests/numpy/test_narrow_floats.py
@@ -18,12 +18,24 @@
 from __future__ import absolute_import, division, print_function
 
 import numpy as np
+import pytest
 
 from hypothesis import given
-from hypothesis.extra.numpy import from_dtype, integer_dtypes
+from hypothesis.extra.numpy import arrays, from_dtype, integer_dtypes
 from hypothesis.strategies import data, floats, integers
 
 
+@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
+@pytest.mark.parametrize("low", [-2.0, -1.0, 0.0, 1.0])
+@given(data())
+def test_bad_float_exclude_min_in_array(dtype, low, data):
+    elements = floats(
+        low, low + 1, exclude_min=True, width=np.dtype(dtype).itemsize * 8
+    )
+    x = data.draw(arrays(dtype, shape=(1,), elements=elements), label="x")
+    assert np.all(low < x)
+
+
 @given(floats(width=32))
 def test_float32_exactly_representable(x):
     clipped = np.dtype("float32").type(x)