Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

jewel: ceph-disk: workaround gperftool hang #9427

Merged
merged 1 commit into from Jun 9, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
34 changes: 32 additions & 2 deletions src/ceph-disk/ceph_disk/main.py
Expand Up @@ -2642,6 +2642,36 @@ def populate_data_path(self, path, *to_prepare_list):
write_one_line(path, 'type', 'bluestore')


#
# Temporary workaround: if ceph-osd --mkfs does not
# complete within 5 minutes, assume it is blocked
# because of http://tracker.ceph.com/issues/13522
# and retry a few times.
#
# Remove this function calls with command_check_call
# when http://tracker.ceph.com/issues/13522 is fixed
#
def ceph_osd_mkfs(arguments):
timeout = _get_command_executable(['timeout'])
mkfs_ok = False
error = 'unknown error'
for delay in os.environ.get('CEPH_OSD_MKFS_DELAYS',
'300 300 300 300 300').split():
try:
_check_output(timeout + [delay] + arguments)
mkfs_ok = True
break
except subprocess.CalledProcessError as e:
error = e.output
if e.returncode == 124: # timeout fired, retry
LOG.debug('%s timed out : %s (retry)'
% (str(arguments), error))
else:
break
if not mkfs_ok:
raise Error('%s failed : %s' % (str(arguments), error))


def mkfs(
path,
cluster,
Expand All @@ -2663,7 +2693,7 @@ def mkfs(
osd_type = read_one_line(path, 'type')

if osd_type == 'bluestore':
command_check_call(
ceph_osd_mkfs(
[
'ceph-osd',
'--cluster', cluster,
Expand All @@ -2679,7 +2709,7 @@ def mkfs(
],
)
else:
command_check_call(
ceph_osd_mkfs(
[
'ceph-osd',
'--cluster', cluster,
Expand Down
30 changes: 28 additions & 2 deletions src/ceph-disk/tests/ceph-disk.sh
@@ -1,7 +1,7 @@
#!/bin/bash
#
# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
# Copyright (C) 2014, 2015, 2016 Red Hat <contact@redhat.com>
#
# Author: Loic Dachary <loic@dachary.org>
#
Expand Down Expand Up @@ -153,7 +153,7 @@ function tweak_path() {
command_fixture ceph-conf || return 1
command_fixture ceph-osd || return 1

test_activate_dir
test_activate_dir || return 1

[ ! -f $DIR/used-ceph-conf ] || return 1
[ ! -f $DIR/used-ceph-osd ] || return 1
Expand Down Expand Up @@ -357,6 +357,31 @@ function test_keyring_path() {
grep --quiet "keyring $DIR/bootstrap-osd/ceph.keyring" $DIR/test_keyring || return 1
}

# http://tracker.ceph.com/issues/13522
function ceph_osd_fail_once_fixture() {
local command=ceph-osd
local fpath=`readlink -f $(which $command)`
[ "$fpath" = `readlink -f ../$command` ] || [ "$fpath" = `readlink -f $(pwd)/$command` ] || return 1

cat > $DIR/$command <<EOF
#!/bin/bash
if echo "\$@" | grep -e --mkfs && ! test -f $DIR/used-$command ; then
touch $DIR/used-$command
# sleep longer than the first CEPH_OSD_MKFS_DELAYS value (5) below
sleep 600
else
exec ../$command "\$@"
fi
EOF
chmod +x $DIR/$command
}

function test_ceph_osd_mkfs() {
ceph_osd_fail_once_fixture || return 1
CEPH_OSD_MKFS_DELAYS='5 300 300' use_path test_activate_dir || return 1
[ -f $DIR/used-ceph-osd ] || return 1
}

function run() {
local default_actions
default_actions+="test_path "
Expand All @@ -369,6 +394,7 @@ function run() {
default_actions+="test_mark_init "
default_actions+="test_zap "
default_actions+="test_activate_dir_bluestore "
default_actions+="test_ceph_osd_mkfs "
local actions=${@:-$default_actions}
local status
for action in $actions ; do
Expand Down