/
length.py
156 lines (127 loc) · 6.59 KB
/
length.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# coding=utf-8
#
# This file is part of Hypothesis, which may be found at
# https://github.com/HypothesisWorks/hypothesis/
#
# Most of this work is copyright (C) 2013-2019 David R. MacIver
# (david@drmaciver.com), but it contains contributions by others. See
# CONTRIBUTING.rst for a full list of people who may hold copyright, and
# consult the git log if you need to determine who owns an individual
# contribution.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
#
# END HEADER
from __future__ import absolute_import, division, print_function
from hypothesis.internal.compat import hrange
from hypothesis.internal.conjecture.shrinking.common import Shrinker, find_integer
"""
This module implements a length minimizer for sequences.
That is, given some sequence of elements satisfying some predicate, it tries to
find a strictly shorter one satisfying the same predicate.
Doing so perfectly is provably exponential. This only implements a linear time
worst case algorithm which guarantees certain minimality properties of the
fixed point.
"""
class Length(Shrinker):
"""Attempts to find a smaller sequence satisfying f. Will only perform
linearly many evaluations, and does not loop to a fixed point.
Guarantees made at a fixed point:
1. No individual element may be deleted.
2. No *adjacent* pair of elements may be deleted.
"""
def make_immutable(self, value):
return tuple(value)
def short_circuit(self):
return self.consider(()) or len(self.current) <= 1
def left_is_better(self, left, right):
return len(left) < len(right)
def run_step(self):
# Try to delete as many elements as possible from the sequence, trying
# each element no more than once.
# We convert the sequence to a set of indices. This allows us to more
# easily do book-keeping around which elements we've tried removing.
initial = self.current
indices = list(hrange(len(self.current)))
# The set of indices that we have not yet removed (either because
# we have not yet tried to remove them or because we tried and
# failed).
current_subset = set(indices)
# The set of indices in current_subset that we have not yet tried
# to remove.
candidates_for_removal = set(current_subset)
def consider_set(keep):
"""Try replacing current_subset with current_subset & keep."""
keep = keep & current_subset
to_remove = current_subset - keep
# Once we've tried and failed to delete an element we never
# attempt to delete it again in the current pass. This can cause
# us to skip shrinks that would work, but that doesn't matter -
# if this pass succeeded then it will run again at some point,
# so those will be picked up later.
if not to_remove.issubset(candidates_for_removal):
return False
if self.consider([v for i, v in enumerate(initial) if i in keep]):
current_subset.intersection_update(keep)
return True
return False
# We iterate over the indices in random order. This is because deletions
# towards the end are more likely to work, while deletions from the
# beginning are more likely to have higher impact. In addition there
# tend to be large "dead" regions where nothing can be deleted, and
# by proceeding in random order we don't have long gaps in those where
# we make no progress.
#
# Note that this may be strictly more expensive than iterating from
# left to right or right to left. The cost of find_integer, say f, is
# convex. When deleting n elements starting from the left we pay f(n)
# invocations, but when starting from the middle we pay 2 f(n / 2)
# > f(n) invocations. In this case we are prioritising making progress
# over a possibly strictly lower cost for two reasons: Firstly, when
# n is small we just do linear scans anyway so this doesn't actually
# matter, and secondly because successfuly deletions will tend to
# speed up the test function and thus even when we make more test
# function calls we may still win on time.
#
# It's also very frustrating watching the shrinker repeatedly fail
# to delete, so there's a psychological benefit to prioritising
# progress over cost.
self.random.shuffle(indices)
for i in indices:
candidates_for_removal &= current_subset
if not candidates_for_removal:
break
# We have already processed this index, either because it was bulk
# removed or is the end point of a set that was.
if i not in candidates_for_removal:
continue
# Note that we do not update candidates_for_removal until we've
# actually tried removing them. This is because our consider_set
# predicate checks whether we've previously tried deleting them,
# so removing them here would result in wrong checks!
# We now try to delete a region around i. We start by trying to
# delete a region starting with i, i.e. [i, j) for some j > i.
to_right = find_integer(
lambda n: i + n <= len(initial)
and consider_set(current_subset - set(hrange(i, i + n)))
)
# If that succeeded we're in a deletable region. It's unlikely that
# we happened to pick the starting index of that region, so we try
# to extend it to the left too.
if to_right > 0:
to_left = find_integer(
lambda n: i - n >= 0
and consider_set(current_subset - set(hrange(i - n, i)))
)
# find_integer always tries at least n + 1 when it returns n.
# This means that we've tried deleting i - (to_left + 1) and
# failed to do so, so we can remove it from our candidates for
# deletion.
candidates_for_removal.discard(i - to_left - 1)
# We've now tried deleting i so remove it.
candidates_for_removal.discard(i)
# As per comment above we've also tried deleting one past the end
# of the region so we remove that from the candidate set too.
candidates_for_removal.discard(i + to_right)