-
Notifications
You must be signed in to change notification settings - Fork 1
/
teststate.jl
342 lines (291 loc) · 11.7 KB
/
teststate.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
"""
err_choices
Return the choices that led to the recorded error, if any.
If none, return `Nothing`.
"""
err_choices(ts::TestState) = if !isnothing(ts.target_err)
Some(last(@something ts.target_err))
else
nothing
end
"""
CURRENT_TESTCASE
A `ScopedValue` containing the currently active test case. Intended for use in user-facing
functions like `target!` or `assume!` that need access to the current testcase, but shouldn't
require it as an argument to make the API more user friendly.
Not intended for user-side access, thus considered internal and not supported under semver.
"""
const CURRENT_TESTCASE = ScopedValue{TestCase}()
"""
test_function(ts::TestState, tc::TestCase)
Test the function given to `ts` on the test case `tc`.
Returns a `NTuple{Bool, 2}` indicating whether `tc` is interesting and whether it is
"better" than the previously best recorded example in `ts`.
"""
function test_function(ts::TestState, tc::TestCase)
# one call == one test of the function in `ts` for the given `TestCase`
ts.calls += 1
interesting, threw = try
@with CURRENT_TESTCASE => tc begin
ts.is_interesting(tc)
end, nothing
catch e
# Interrupts are an abort signal, so rethrow
e isa InterruptException && rethrow()
# UndefVarError are a programmer error, so rethrow
e isa UndefVarError && rethrow()
# These are wanted rejections
e isa TestException && return (false, false)
# true errors are always interesting
true, Some((e, stacktrace(catch_backtrace())))
end
!(interesting isa Bool) && return (false, false)
if !interesting
ts.test_is_trivial = isempty(tc.attempt.choices)
ts.valid_test_cases += 1
# check for target improvement
was_better = false
if !isnothing(tc.targeting_score)
score = @something tc.targeting_score
old_score, _ = @something ts.best_scoring Some((typemin(score),nothing))
if old_score < score
ts.best_scoring = Some((score, copy(tc.attempt)))
was_better = true
end
end
return (false, was_better)
else
ts.test_is_trivial = isempty(tc.attempt.choices)
ts.valid_test_cases += 1
# Check for interestingness
was_more_interesting = false
if isnothing(threw) && (isnothing(ts.result) ||
length((@something ts.result).choices) > length(tc.attempt.choices) ||
(@something(ts.result).choices) > tc.attempt.choices)
ts.result = Some(copy(tc.attempt))
was_more_interesting = true
end
# check for target improvement
was_better = false
if !isnothing(tc.targeting_score)
score = @something tc.targeting_score
old_score, attempt = @something ts.best_scoring Some((typemin(score), (;choices=UInt[])))
if old_score < score || (old_score == score && attempt.choices > tc.attempt.choices)
ts.best_scoring = Some((score, copy(tc.attempt)))
was_better = true
end
end
if isnothing(ts.target_err)
# we haven't had an error so far, but have we hit one now?
if !isnothing(threw)
err, trace = @something threw
len = find_user_stack_depth(trace)
ts.target_err = Some((err, trace, len, copy(tc.attempt)))
return (true, true)
end
else # we already had an error - did we hit the same one?
# we didn't throw, so this is strictly less interesting
isnothing(threw) && return (false, false)
err, trace = @something threw
old_err, old_trace, old_len, old_attempt = @something ts.target_err
old_frame = find_user_error_frame(old_err, old_trace)
frame = find_user_error_frame(err, trace)
# if the error isn't the same, it can't possibly be better
if !(typeof(err) == typeof(old_err) && frame == old_frame)
cache_entry = (typeof(err), frame)
if !(cache_entry in ts.error_cache)
@warn "Encountered an error, but it was different from the previously seen one - Ignoring!" Error=err Location=frame
push!(ts.error_cache, cache_entry)
end
return (false, false)
end
was_more_interesting = true
len = find_user_stack_depth(trace)
was_better |= len < old_len || (len == old_len && tc.attempt.choices < old_attempt.choices) || (applicable(err_less, err, old_err) && err_less(err, old_err))
if was_better
ts.target_err = Some((err, trace, len, copy(tc.attempt)))
end
end
return (was_more_interesting, was_better)
end
end
"""
find_user_error_frame(err, trace)
Try to heuristically guess where an error was actually coming from.
For example, `ErrorException` is (generally) thrown from the `error`
function, which would always report the same location if we'd naively
take the first frame of the trace. This tries to be a bit smarter (but
still fairly conservative) and return something other than the first
frame for a small number of known error-throwing functions.
"""
function find_user_error_frame end
find_user_error_frame(err, trace::Vector{StackFrame}) = first(trace)
function find_user_error_frame(err::ErrorException, trace::Vector{StackFrame})
top_frame = first(trace)
error_func_methods = methods(error)
err_exc_lines = ( getproperty(em, :line) for em in error_func_methods )
len_ok = length(trace) >= 2
frame_is_error = top_frame.func == :error
line_is_error = top_frame.line in err_exc_lines
# we can special case the file here since this is only one error
# NOTE: This can break in future versions, if the error function moves!
file_is_error = top_frame.file === Symbol("./error.jl")
if (len_ok & frame_is_error & line_is_error & file_is_error)
# if there are more frames in the trace AND the top frame is just the `error` function,
# return the frame that called `error` instead of `error` itself
return @inbounds trace[2]
else
return top_frame
end
end
"""
find_user_stack_depth
Return a heuristic guess for how many functions deep in user code an error was thrown.
Falls back to the full length of the stacktrace.
"""
function find_user_stack_depth(trace)::Int
res = findfirst(trace) do sf
sf.func == :test_function && sf.file == Symbol(@__FILE__)
end
@something res Some(length(trace))
end
"""
run(ts::TestState)
Run the checking algorithm on `ts`, generating values until we should stop, targeting
the score we want to target on and finally shrinking the result.
"""
function run(ts::TestState)
@debug "Starting generating values" Test=ts.is_interesting
generate!(ts)
@debug "Improving targeted example"
target!(ts)
@debug "Shrinking example"
shrink!(ts)
@debug "Done!"
nothing
end
"""
should_keep_generating(ts::TestState)
Whether `ts` should keep generating new test cases, or whether `ts` is finished.
`true` returned here means that the given property is not trivial, there is no result yet
and we have room for more examples.
"""
function should_keep_generating(ts::TestState)
triv = ts.test_is_trivial
# Either we find a regular counterexample, or we error
# both mean we can stop looking, and start shrinking
no_result = isnothing(ts.result) & isnothing(ts.target_err)
more_examples = ts.valid_test_cases < ts.config.max_examples
# this 10x ensures that we can make many more calls than
# we need to fill valid test cases, especially when targeting
more_calls = ts.calls < (10*ts.config.max_examples)
ret = !triv & no_result & more_examples & more_calls
return ret
end
"""
adjust(ts::TestState, attempt)
Adjust `ts` by testing for the choices given by `attempt`.
Returns whether `attempt` was by some measure better than the previously
best attempt.
"""
function adjust(ts::TestState, attempt::Attempt)
result = test_function(ts, for_choices(attempt.choices, copy(ts.rng), attempt.generation, attempt.max_generation))
last(result)
end
"""
target!(ts::TestState)
If `ts` has a target to go towards set, this will try to climb towards that target
by adjusting the choice sequence until `ts` shouldn't generate anymore.
If `ts` is currently tracking an error it encountered, it will try to minimize the
stacktrace there instead.
"""
function target!(ts::TestState)
!(isnothing(ts.result) && isnothing(ts.target_err)) && return
isnothing(ts.best_scoring) && return
@debug "Targeting" Score=!isnothing(ts.best_scoring) Err=!isnothing(ts.target_err)
while should_keep_generating(ts)
# It may happen that choices is all zeroes, and that targeting upwards
# doesn't do anything. In this case, the loop will run until max_examples
# is exhausted.
# can we climb up?
new = copy(last(@something ts.target_err ts.best_scoring))
i = rand(ts.rng, 1:length(new.choices))
new.choices[i] += 1
if adjust(ts, new)
k = 1
new.choices[i] += k
while should_keep_generating(ts) && adjust(ts, new)
k *= 2
new.choices[i] += k
end
while k > 0
while should_keep_generating(ts) && adjust(ts, new)
new.choices[i] += k
end
k ÷= 2
end
end
# Or should we climb down?
new = copy(last(@something ts.target_err ts.best_scoring))
if new.choices[i] < 1
continue
end
new.choices[i] -= 1
if adjust(ts, new)
k = 1
if new.choices[i] < k
continue
end
new.choices[i] -= k
while should_keep_generating(ts) && adjust(ts, new)
if new.choices[i] < k
break
end
new.choices[i] -= k
k *= 2
end
while k > 0
while should_keep_generating(ts) && adjust(ts, new)
if new.choices[i] < k
break
end
new.choices[i] -= k
end
k ÷= 2
end
end
end
end
"""
generate(ts::TestState)
Try to generate an example that falsifies the property given to `ts`.
"""
function generate!(ts::TestState)
# 1) try to reproduce a previous failure
if !isnothing(ts.previous_example)
attempt = @something ts.previous_example
# FIXME: the RNG should be stored too!
tc = for_choices(attempt.choices, ts.rng, attempt.generation, attempt.max_generation)
test_function(ts, tc)
end
# 2) try to generate new counterexamples
while should_keep_generating(ts) & # no result
(isnothing(ts.best_scoring) || # no score
(ts.valid_test_cases <= ts.config.max_examples÷2))
# +1, since we this test case is for the *next* call
tc = TestCase(UInt64[], ts.rng, ts.calls+1, ts.config.max_examples, ts.config.buffer_size*8)
test_function(ts, tc)
end
end
"""
consider(ts::TestState, attempt::Attempt) -> Bool
Returns whether the given choices are a conceivable example for the testcase given by `ts`.
"""
function consider(ts::TestState, attempt::Attempt)::Bool
compare = @something(ts.result, Some((;choices=nothing))).choices
if attempt.choices == compare
true
else
first(test_function(ts, for_choices(attempt.choices, copy(ts.rng), attempt.generation, attempt.max_generation)))
end
end