Permalink
Fetching contributors…
Cannot retrieve contributors at this time
157 lines (127 sloc) 6.59 KB
# coding=utf-8
#
# This file is part of Hypothesis, which may be found at
# https://github.com/HypothesisWorks/hypothesis/
#
# Most of this work is copyright (C) 2013-2019 David R. MacIver
# (david@drmaciver.com), but it contains contributions by others. See
# CONTRIBUTING.rst for a full list of people who may hold copyright, and
# consult the git log if you need to determine who owns an individual
# contribution.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
#
# END HEADER
from __future__ import absolute_import, division, print_function
from hypothesis.internal.compat import hrange
from hypothesis.internal.conjecture.shrinking.common import Shrinker, find_integer
"""
This module implements a length minimizer for sequences.
That is, given some sequence of elements satisfying some predicate, it tries to
find a strictly shorter one satisfying the same predicate.
Doing so perfectly is provably exponential. This only implements a linear time
worst case algorithm which guarantees certain minimality properties of the
fixed point.
"""
class Length(Shrinker):
"""Attempts to find a smaller sequence satisfying f. Will only perform
linearly many evaluations, and does not loop to a fixed point.
Guarantees made at a fixed point:
1. No individual element may be deleted.
2. No *adjacent* pair of elements may be deleted.
"""
def make_immutable(self, value):
return tuple(value)
def short_circuit(self):
return self.consider(()) or len(self.current) <= 1
def left_is_better(self, left, right):
return len(left) < len(right)
def run_step(self):
# Try to delete as many elements as possible from the sequence, trying
# each element no more than once.
# We convert the sequence to a set of indices. This allows us to more
# easily do book-keeping around which elements we've tried removing.
initial = self.current
indices = list(hrange(len(self.current)))
# The set of indices that we have not yet removed (either because
# we have not yet tried to remove them or because we tried and
# failed).
current_subset = set(indices)
# The set of indices in current_subset that we have not yet tried
# to remove.
candidates_for_removal = set(current_subset)
def consider_set(keep):
"""Try replacing current_subset with current_subset & keep."""
keep = keep & current_subset
to_remove = current_subset - keep
# Once we've tried and failed to delete an element we never
# attempt to delete it again in the current pass. This can cause
# us to skip shrinks that would work, but that doesn't matter -
# if this pass succeeded then it will run again at some point,
# so those will be picked up later.
if not to_remove.issubset(candidates_for_removal):
return False
if self.consider([v for i, v in enumerate(initial) if i in keep]):
current_subset.intersection_update(keep)
return True
return False
# We iterate over the indices in random order. This is because deletions
# towards the end are more likely to work, while deletions from the
# beginning are more likely to have higher impact. In addition there
# tend to be large "dead" regions where nothing can be deleted, and
# by proceeding in random order we don't have long gaps in those where
# we make no progress.
#
# Note that this may be strictly more expensive than iterating from
# left to right or right to left. The cost of find_integer, say f, is
# convex. When deleting n elements starting from the left we pay f(n)
# invocations, but when starting from the middle we pay 2 f(n / 2)
# > f(n) invocations. In this case we are prioritising making progress
# over a possibly strictly lower cost for two reasons: Firstly, when
# n is small we just do linear scans anyway so this doesn't actually
# matter, and secondly because successfuly deletions will tend to
# speed up the test function and thus even when we make more test
# function calls we may still win on time.
#
# It's also very frustrating watching the shrinker repeatedly fail
# to delete, so there's a psychological benefit to prioritising
# progress over cost.
self.random.shuffle(indices)
for i in indices:
candidates_for_removal &= current_subset
if not candidates_for_removal:
break
# We have already processed this index, either because it was bulk
# removed or is the end point of a set that was.
if i not in candidates_for_removal:
continue
# Note that we do not update candidates_for_removal until we've
# actually tried removing them. This is because our consider_set
# predicate checks whether we've previously tried deleting them,
# so removing them here would result in wrong checks!
# We now try to delete a region around i. We start by trying to
# delete a region starting with i, i.e. [i, j) for some j > i.
to_right = find_integer(
lambda n: i + n <= len(initial)
and consider_set(current_subset - set(hrange(i, i + n)))
)
# If that succeeded we're in a deletable region. It's unlikely that
# we happened to pick the starting index of that region, so we try
# to extend it to the left too.
if to_right > 0:
to_left = find_integer(
lambda n: i - n >= 0
and consider_set(current_subset - set(hrange(i - n, i)))
)
# find_integer always tries at least n + 1 when it returns n.
# This means that we've tried deleting i - (to_left + 1) and
# failed to do so, so we can remove it from our candidates for
# deletion.
candidates_for_removal.discard(i - to_left - 1)
# We've now tried deleting i so remove it.
candidates_for_removal.discard(i)
# As per comment above we've also tried deleting one past the end
# of the region so we remove that from the candidate set too.
candidates_for_removal.discard(i + to_right)