Skip to content

Commit

Permalink
Merge pull request #2049 from Zac-HD/array-warning
Browse files Browse the repository at this point in the history
Detect and warn on float truncation in arrays
  • Loading branch information
Zac-HD committed Jul 28, 2019
2 parents 39ca4ad + 1a3cc22 commit 5896334
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 36 deletions.
11 changes: 11 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
RELEASE_TYPE: minor

This release makes :func:`~hypothesis.extra.numpy.arrays` more pedantic about
``elements`` strategies that cannot be exactly represented as array elements.

In practice, you will see new warnings if you were using a ``float16`` or
``float32`` dtype without passing :func:`~hypothesis.strategies.floats` the
``width=16`` or ``width=32`` arguments respectively.

The previous behaviour could lead to silent truncation, and thus some elements
being equal to an explicitly excluded bound (:issue:`1899`).
39 changes: 8 additions & 31 deletions hypothesis-python/src/hypothesis/extra/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,42 +133,18 @@ def __init__(self, element_strategy, shape, dtype, fill, unique):
self.element_strategy = element_strategy
self.unique = unique

# Used by self.insert_element to check that the value can be stored
# in the array without e.g. overflowing. See issues #1385 and #1591.
if dtype.kind in (u"i", u"u"):
self.check_cast = lambda x: np.can_cast(x, self.dtype, "safe")
elif dtype.kind == u"f" and dtype.itemsize == 2:
max_f2 = (2.0 - 2 ** -10) * 2 ** 15
self.check_cast = lambda x: (not np.isfinite(x)) or (-max_f2 <= x <= max_f2)
elif dtype.kind == u"f" and dtype.itemsize == 4:
max_f4 = (2.0 - 2 ** -23) * 2 ** 127
self.check_cast = lambda x: (not np.isfinite(x)) or (-max_f4 <= x <= max_f4)
elif dtype.kind == u"c" and dtype.itemsize == 8:
max_f4 = (2.0 - 2 ** -23) * 2 ** 127
self.check_cast = lambda x: (not np.isfinite(x)) or (
-max_f4 <= x.real <= max_f4 and -max_f4 <= x.imag <= max_f4
)
elif dtype.kind == u"U":
length = dtype.itemsize // 4
self.check_cast = lambda x: len(x) <= length and u"\0" not in x[length:]
elif dtype.kind in (u"S", u"a"):
self.check_cast = (
lambda x: len(x) <= dtype.itemsize and b"\0" not in x[dtype.itemsize :]
)
else:
self.check_cast = lambda x: True

def set_element(self, data, result, idx, strategy=None):
strategy = strategy or self.element_strategy
val = data.draw(strategy)
result[idx] = val
if self._report_overflow and not self.check_cast(val):
if self._report_overflow and val != result[idx] and val == val:
note_deprecation(
"Generated array element %r from %r cannot be represented as "
"dtype %r - instead it becomes %r . Consider using a more "
"precise strategy, as this will be an error in a future "
"version." % (val, strategy, self.dtype, result[idx]),
since="2018-10-25",
"dtype %r - instead it becomes %r (type %r). Consider using a more "
"precise strategy, for example passing the `width` argument to "
"`floats()`, as this will be an error in a future version."
% (val, strategy, self.dtype, result[idx], type(result[idx])),
since="RELEASEDAY",
)
# Because the message includes the value of the generated element,
# it would be easy to spam users with thousands of warnings.
Expand All @@ -180,7 +156,8 @@ def do_draw(self, data):
return np.zeros(dtype=self.dtype, shape=self.shape)

# Reset this flag for each test case to emit warnings from set_element
self._report_overflow = True
# Skip the check for object or void (multi-element) dtypes
self._report_overflow = self.dtype.kind not in ("O", "V")

# This could legitimately be a np.empty, but the performance gains for
# that would be so marginal that there's really not much point risking
Expand Down
57 changes: 55 additions & 2 deletions hypothesis-python/tests/cover/test_numerics.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,68 @@
from __future__ import absolute_import, division, print_function

import decimal
from math import copysign

import pytest

from hypothesis import assume, given, reject, settings
from hypothesis.errors import InvalidArgument
from hypothesis.strategies import data, decimals, fractions, integers, none, tuples
from hypothesis.errors import HypothesisDeprecationWarning, InvalidArgument
from hypothesis.internal.floats import next_down
from hypothesis.strategies import (
booleans,
data,
decimals,
floats,
fractions,
integers,
none,
tuples,
)
from tests.common.debug import find_any


@given(data())
def test_fuzz_floats_bounds(data):
bound = none() | floats(allow_nan=False)
low, high = data.draw(tuples(bound, bound), label="low, high")
if low is not None and high is not None and low > high:
low, high = high, low
exmin = (
low is not None
and low != float("inf")
and data.draw(booleans(), label="exclude_min")
)
exmax = (
high is not None
and high != float("-inf")
and data.draw(booleans(), label="exclude_max")
)
try:
val = data.draw(
floats(low, high, exclude_min=exmin, exclude_max=exmax), label="value"
)
assume(val) # positive/negative zero is an issue
except (InvalidArgument, HypothesisDeprecationWarning):
assert (
(exmin and exmax and low == next_down(high))
or (low == high and (exmin or exmax))
or (
low == high == 0
and copysign(1.0, low) == 1
and copysign(1.0, high) == -1
)
)
reject() # no floats in required range
if low is not None:
assert low <= val
if high is not None:
assert val <= high
if exmin:
assert low != val
if exmax:
assert high != val


@given(data())
def test_fuzz_fractions_bounds(data):
denom = data.draw(none() | integers(1, 100), label="denominator")
Expand Down
4 changes: 2 additions & 2 deletions hypothesis-python/tests/numpy/test_gen_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def test_can_cast_for_scalars(data):
# combinations will result in an error if numpy is not able to cast them.
dt_elements = np.dtype(data.draw(st.sampled_from(["bool", "<i2", ">i2"])))
dt_desired = np.dtype(
data.draw(st.sampled_from(["<i2", ">i2", "float16", "float32", "float64"]))
data.draw(st.sampled_from(["<i2", ">i2", "float32", "float64"]))
)
result = data.draw(
nps.arrays(dtype=dt_desired, elements=nps.from_dtype(dt_elements), shape=())
Expand All @@ -283,7 +283,7 @@ def test_can_cast_for_arrays(data):
# combinations will result in an error if numpy is not able to cast them.
dt_elements = np.dtype(data.draw(st.sampled_from(["bool", "<i2", ">i2"])))
dt_desired = np.dtype(
data.draw(st.sampled_from(["<i2", ">i2", "float16", "float32", "float64"]))
data.draw(st.sampled_from(["<i2", ">i2", "float32", "float64"]))
)
result = data.draw(
nps.arrays(
Expand Down
14 changes: 13 additions & 1 deletion hypothesis-python/tests/numpy/test_narrow_floats.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,24 @@
from __future__ import absolute_import, division, print_function

import numpy as np
import pytest

from hypothesis import given
from hypothesis.extra.numpy import from_dtype, integer_dtypes
from hypothesis.extra.numpy import arrays, from_dtype, integer_dtypes
from hypothesis.strategies import data, floats, integers


@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
@pytest.mark.parametrize("low", [-2.0, -1.0, 0.0, 1.0])
@given(data())
def test_bad_float_exclude_min_in_array(dtype, low, data):
elements = floats(
low, low + 1, exclude_min=True, width=np.dtype(dtype).itemsize * 8
)
x = data.draw(arrays(dtype, shape=(1,), elements=elements), label="x")
assert np.all(low < x)


@given(floats(width=32))
def test_float32_exactly_representable(x):
clipped = np.dtype("float32").type(x)
Expand Down

0 comments on commit 5896334

Please sign in to comment.