Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests: fix ceph-helpers.sh wait_for_clean delays #12095

Merged
merged 1 commit into from Nov 21, 2016
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
58 changes: 41 additions & 17 deletions qa/workunits/ceph-helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,41 @@ function test_is_clean() {

#######################################################################

##
# Return a list of numbers that are increasingly larger and whose
# total is **timeout** seconds. It can be used to have short sleep
# delay while waiting for an event on a fast machine. But if running
# very slowly the larger delays avoid stressing the machine even
# further or spamming the logs.
#
# @param timeout sum of all delays, in seconds
# @return a list of sleep delays
#
function get_timeout_delays() {
local -i timeout=$1
local -i first_step=1

local -i i
local -i total=0
for (( i = $first_step ; $total + $i <= $timeout ; i *= 2 )) ; do
echo -n "$i "
total+=$i
done
if (( $total < $timeout )) ; then
echo -n $(( $timeout - $total ))
fi
}

function test_get_timeout_delays() {
test "$(get_timeout_delays 1)" = "1 " || return 1
test "$(get_timeout_delays 5)" = "1 2 2" || return 1
test "$(get_timeout_delays 6)" = "1 2 3" || return 1
test "$(get_timeout_delays 7)" = "1 2 4 " || return 1
test "$(get_timeout_delays 8)" = "1 2 4 1" || return 1
}

#######################################################################

##
# Wait until the cluster becomes clean or if it does not make progress
# for $TIMEOUT seconds.
Expand All @@ -1080,18 +1115,12 @@ function test_is_clean() {
# @return 0 if the cluster is clean, 1 otherwise
#
function wait_for_clean() {
local status=1
local num_active_clean=-1
local cur_active_clean
local -a delays=($(get_timeout_delays $TIMEOUT))
local -i timer=0
local -i loops=0
local -i phase=0
local num_pgs=$(get_num_pgs)
test $num_pgs != 0 || return 1
local TENTH_TIMEOUT=($TIMEOUT * 10)
# The first phase 10 times (1 second) second phase an additional 15 times (15 seconds)
local backoff_phases=( 10 15 -1 )
local sleep_backoff=( .1 1 10)

while true ; do
# Comparing get_num_active_clean & get_num_pgs is used to determine
Expand All @@ -1100,21 +1129,16 @@ function wait_for_clean() {
cur_active_clean=$(get_num_active_clean)
test $cur_active_clean = $num_pgs && break
if test $cur_active_clean != $num_active_clean ; then
timer=0 ; loops=0 ; phase=0
timer=0
num_active_clean=$cur_active_clean
elif get_is_making_recovery_progress ; then
timer=0 ; loops=0 ; phase=0
elif (( timer >= $TENTH_TIMEOUT)) ; then
timer=0
elif (( $timer >= ${#delays[*]} )) ; then
ceph report
return 1
fi
sleep ${sleep_backoff[$phase]}
timer="$(echo $timer + ${sleep_backoff[$phase]} \* 10 | bc | cut -d. -f1)"
loops=$(expr $loops + 1)
if (( $loops == ${backoff_phases[$phase]} )); then
phase=$(expr $phase + 1)
loop=0
fi
sleep ${delays[$timer]}
timer+=1
done
return 0
}
Expand Down