Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

global: do not start two daemons with a single pid-file #7075

Merged
merged 2 commits into from Jan 29, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 13 additions & 4 deletions src/global/global_init.cc
Expand Up @@ -261,14 +261,19 @@ int global_init_prefork(CephContext *cct)
{
if (g_code_env != CODE_ENVIRONMENT_DAEMON)
return -1;

const md_config_t *conf = cct->_conf;
if (!conf->daemonize) {

if (pidfile_open(g_conf) < 0) {
exit(1);
}

if (atexit(pidfile_remove_void)) {
derr << "global_init_daemonize: failed to set pidfile_remove function "
<< "to run at exit." << dendl;
}

pidfile_write(g_conf);
pidfile_write();

return -1;
}
Expand All @@ -292,7 +297,7 @@ void global_init_daemonize(CephContext *cct)
<< cpp_strerror(ret) << dendl;
exit(1);
}

global_init_postfork_start(cct);
global_init_postfork_finish(cct);
#else
Expand All @@ -305,6 +310,10 @@ void global_init_postfork_start(CephContext *cct)
// restart log thread
g_ceph_context->_log->start();

if (pidfile_open(g_conf) < 0) {
exit(1);
}

if (atexit(pidfile_remove_void)) {
derr << "global_init_daemonize: failed to set pidfile_remove function "
<< "to run at exit." << dendl;
Expand Down Expand Up @@ -333,7 +342,7 @@ void global_init_postfork_start(CephContext *cct)
exit(1);
}

pidfile_write(g_conf);
pidfile_write();
}

void global_init_postfork_finish(CephContext *cct)
Expand Down
150 changes: 113 additions & 37 deletions src/global/pidfile.cc
Expand Up @@ -31,68 +31,144 @@

#define dout_prefix *_dout

static char pid_file[PATH_MAX] = "";
struct pidfh {
int pf_fd;
char pf_path[PATH_MAX + 1];
dev_t pf_dev;
ino_t pf_ino;

pidfh() : pf_fd(-1), pf_dev(0), pf_ino(0) {
memset(pf_path, 0, sizeof(pf_path));
}

void close() {
pf_fd = -1;
pf_path[0] = '\0';
pf_dev = 0;
pf_ino = 0;
}
};
static pidfh pfh;

int pidfile_write(const md_config_t *conf)
{
int ret, fd;
static int pidfile_verify() {
struct stat st;

if (conf->pid_file.empty()) {
return pidfile_remove();
if (pfh.pf_fd == -1) {
return -EINVAL;
}
/*
* Check remembered descriptor
*/
if (fstat(pfh.pf_fd, &st) == -1) {
return -errno;
}
snprintf(pid_file, PATH_MAX, "%s", conf->pid_file.c_str());

fd = TEMP_FAILURE_RETRY(::open(pid_file,
O_CREAT|O_TRUNC|O_WRONLY, 0644));
if (fd < 0) {
int err = errno;
derr << "write_pid_file: failed to open pid file '"
<< pid_file << "': " << cpp_strerror(err) << dendl;
return err;
if (st.st_dev != pfh.pf_dev || st.st_ino != pfh.pf_ino) {
return -ESTALE;
}
return 0;
}

int pidfile_write()
{
if (!pfh.pf_path[0]) {
return 0;
}

char buf[20];
int ret;
if ((ret = pidfile_verify()) < 0) {
return ret;
}

char buf[32];
int len = snprintf(buf, sizeof(buf), "%d\n", getpid());
ret = safe_write(fd, buf, len);
ret = safe_write(pfh.pf_fd, buf, len);
if (ret < 0) {
derr << "write_pid_file: failed to write to pid file '"
<< pid_file << "': " << cpp_strerror(ret) << dendl;
VOID_TEMP_FAILURE_RETRY(::close(fd));
<< pfh.pf_path << "': " << cpp_strerror(ret) << dendl;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential fd leak here.
Shall put an extra
VOID_TEMP_FAILURE_RETRY(::close(pfh.pf_fd));
here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xiexingguo all clean work will be done by pidfile_remove, which is called when daemon shutdown. so it seems not to cause fd leak, am i right?

pfh.close();
return ret;
}
if (TEMP_FAILURE_RETRY(::close(fd))) {
ret = errno;
derr << "SimpleMessenger::write_pid_file: failed to close to pid file '"
<< pid_file << "': " << cpp_strerror(ret) << dendl;
return -ret;
}

return 0;
}

int pidfile_remove(void)
int pidfile_remove()
{
if (!pid_file[0])
if (!pfh.pf_path[0]) {
return 0;
}

int ret;
if ((ret = pidfile_verify()) < 0) {
if (pfh.pf_fd != -1) {
::close(pfh.pf_fd);
}
return ret;
}

// only remove it if it has OUR pid in it!
int fd = TEMP_FAILURE_RETRY(::open(pid_file, O_RDONLY));
if (fd < 0)
return -errno;
char buf[32];
memset(buf, 0, sizeof(buf));
ssize_t res = safe_read(fd, buf, sizeof(buf));
VOID_TEMP_FAILURE_RETRY(::close(fd));
if (res < 0)
ssize_t res = safe_read(pfh.pf_fd, buf, sizeof(buf));
if (pfh.pf_fd != -1) {
::close(pfh.pf_fd);
}

if (res < 0) {
return res;
}

int a = atoi(buf);
if (a != getpid())
if (a != getpid()) {
return -EDOM;

res = ::unlink(pid_file);
if (res)
}
res = ::unlink(pfh.pf_path);
if (res) {
return res;
}
pfh.close();
return 0;
}

int pidfile_open(const md_config_t *conf)
{
if (conf->pid_file.empty()) {
return 0;
}
int len = snprintf(pfh.pf_path, sizeof(pfh.pf_path),
"%s", conf->pid_file.c_str());

if (len >= (int)sizeof(pfh.pf_path)) {
return -ENAMETOOLONG;
}

pid_file[0] = '\0';
int fd;
fd = ::open(pfh.pf_path, O_CREAT|O_WRONLY, 0644);
if (fd < 0) {
int err = errno;
derr << "write_pid_file: failed to open pid file '"
<< pfh.pf_path << "': " << cpp_strerror(err) << dendl;
pfh.close();
return -errno;
}
struct stat st;
if (fstat(fd, &st) == -1) {
close(fd);
pfh.close();
return -errno;
}
pfh.pf_fd = fd;
pfh.pf_dev = st.st_dev;
pfh.pf_ino = st.st_ino;

struct flock l = { F_WRLCK, SEEK_SET, 0, 0, 0 };
int r = ::fcntl(pfh.pf_fd, F_SETLK, &l);
if (r < 0) {
derr << "failed to lock pidfile - " << pfh.pf_path << ". is there another process in use?" << dendl;
close(pfh.pf_fd);
pfh.close();
return -errno;
}
return 0;
}
7 changes: 5 additions & 2 deletions src/global/pidfile.h
Expand Up @@ -19,10 +19,13 @@ struct md_config_t;

// Write a pidfile with the current pid, using the configuration in the
// provided conf structure.
int pidfile_write(const md_config_t *conf);
int pidfile_write();

// Remove the pid file that was previously written by pidfile_write.
// This is safe to call in a signal handler context.
int pidfile_remove(void);
int pidfile_remove();

// test whether the pid_file is being used by another process
int pidfile_open(const md_config_t *conf);

#endif
3 changes: 2 additions & 1 deletion src/test/Makefile.am
Expand Up @@ -89,7 +89,8 @@ check_SCRIPTS += \
test/osd/osd-markdown.sh \
test/mon/mon-handle-forward.sh \
test/libradosstriper/rados-striper.sh \
test/test_objectstore_memstore.sh
test/test_objectstore_memstore.sh \
test/test_pidfile.sh

check_SCRIPTS += test/ceph-disk.sh

Expand Down
76 changes: 76 additions & 0 deletions src/test/test_pidfile.sh
@@ -0,0 +1,76 @@
#!/bin/bash

#
# test pidfile here
#

# Includes
source ../qa/workunits/ceph-helpers.sh

function run() {
local dir=$1
shift

export CEPH_MON="127.0.0.1:17108"
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "

local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do
$func $dir || return 1
done
}

function TEST_without_pidfile() {
local dir=$1
setup $dir
local RUNID=`uuidgen`
run_mon $dir a --pid-file= --daemonize=$RUNID || { teardown_unexist_pidfile $dir; return 1; }
run_osd $dir 0 --pid-file= --daemonize=$RUNID || { teardown_unexist_pidfile $dir; return 1; }
teardown_unexist_pidfile $dir $RUNID || return 1
}

function teardown_unexist_pidfile() {
local dir=$1
shift
local RUNID=$1
shift
local delays=${4:-0 0 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120}
local pids=$(ps aux|awk "/cep[h].*$RUNID.*/ {print \$2}")
local status=0
for i in $pids ; do
local kill_complete=false
for try in $delays ; do
if kill $i 2> /dev/null ; then
kill_complete=false
sleep $try
else
kill_complete=true
break
fi
done
if ! $kill_complete ; then
status=1
fi
done
if [ $(stat -f -c '%T' .) == "btrfs" ]; then
__teardown_btrfs $dir
fi
rm -fr $dir
return $status
}

function TEST_contend_pidfile() {
local dir=$1
setup $dir

run_mon $dir a
run_mon $dir a 2>&1 | grep "failed to lock pidfile" || return 1

run_osd $dir 0
run_osd $dir 0 2>&1 | grep "failed to lock pidfile" || return 1
teardown $dir || return 1
}

main pidfile