Permalink
Browse files

lrmd: Have pacemaker-remote reap zombies if it is running as pid 1

  • Loading branch information...
beekhof committed Apr 19, 2017
1 parent 26c59fb commit 8abdd82ba85ee384ab78ce1db617f51b692e9df6
Showing with 131 additions and 1 deletion.
  1. +10 −0 configure.ac
  2. +121 −1 lrmd/main.c
View
@@ -823,6 +823,16 @@ if test "$ac_cv_header_libxslt_xslt_h" != "yes"; then
AC_MSG_ERROR(The libxslt developement headers were not found)
fi
AC_CACHE_CHECK(whether __progname and __progname_full are available,
pf_cv_var_progname,
AC_TRY_LINK([extern char *__progname, *__progname_full;],
[__progname = "foo"; __progname_full = "foo bar";],
pf_cv_var_progname="yes", pf_cv_var_progname="no"))
if test "$pf_cv_var_progname" = "yes"; then
AC_DEFINE(HAVE___PROGNAME,1,[ ])
fi
dnl ========================================================================
dnl Structures
dnl ========================================================================
View
@@ -21,6 +21,11 @@
#include <glib.h>
#include <unistd.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/prctl.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
@@ -391,6 +396,119 @@ void handle_shutdown_nack()
crm_debug("Ignoring unexpected shutdown nack");
}
static pid_t main_pid = 0;
static void
sigdone(void)
{
exit(0);
}
static void
sigreap(void)
{
pid_t pid = 0;
int status;
do {
/*
* Opinions seem to differ as to what to put here:
* -1, any child process
* 0, any child process whose process group ID is equal to that of the calling process
*/
pid = waitpid(-1, &status, WNOHANG);
if(pid == main_pid) {
/* Exit when pacemaker-remote exits and use the same return code */
if (WIFEXITED(status)) {
exit(WEXITSTATUS(status));
}
exit(1);
}
} while (pid > 0);
}
static struct {
int sig;
void (*handler)(void);
} sigmap[] = {
{ SIGCHLD, sigreap },
{ SIGINT, sigdone },
};
static void spawn_pidone(int argc, char **argv, char **envp)
{
sigset_t set;
if (getpid() != 1) {
return;
}
sigfillset(&set);
sigprocmask(SIG_BLOCK, &set, 0);
main_pid = fork();
switch (main_pid) {
case 0:
sigprocmask(SIG_UNBLOCK, &set, NULL);
setsid();
setpgid(0, 0);
/* Child remains as pacemaker_remoted */
return;
case -1:
perror("fork");
}
/* Parent becomes the reaper of zombie processes */
/* Safe to initialize logging now if needed */
#ifdef HAVE___PROGNAME
/* Differentiate ourselves in the 'ps' output */
{

This comment has been minimized.

Show comment
Hide comment
@jnpkrn

jnpkrn May 2, 2017

Contributor

So the purpose of the conditional compilation is to only proceed if there's (nonportable) implicit backup of the original name? libqb does not use this backup AFAIK, not sure about the libc functions.

Does it make sense to spoof itself to out-of-cointainer world? I don't think in-container processes would care, and in many containers running scenario there will be just a bunch of pcmk-init processes (as opposed to pacemaker_remoted?). It might make sense to have a suffix denoting the container nesting level, supposing it will get somehow propagated as environment variables?

@jnpkrn

jnpkrn May 2, 2017

Contributor

So the purpose of the conditional compilation is to only proceed if there's (nonportable) implicit backup of the original name? libqb does not use this backup AFAIK, not sure about the libc functions.

Does it make sense to spoof itself to out-of-cointainer world? I don't think in-container processes would care, and in many containers running scenario there will be just a bunch of pcmk-init processes (as opposed to pacemaker_remoted?). It might make sense to have a suffix denoting the container nesting level, supposing it will get somehow propagated as environment variables?

This comment has been minimized.

Show comment
Hide comment
@kgaillot

kgaillot May 2, 2017

Collaborator

Containers have their own process space, so there is no relation to the outside world. If run as PID 1, pacemaker_remoted will now fork and run its usual main process inside the child, and its main process will exist only to reap orphaned processes (as any PID 1 is supposed to do). The name change makes this behavioral difference more obvious to any system administrator listing processes in the container.

I'm not familiar with how __progname is used.

@kgaillot

kgaillot May 2, 2017

Collaborator

Containers have their own process space, so there is no relation to the outside world. If run as PID 1, pacemaker_remoted will now fork and run its usual main process inside the child, and its main process will exist only to reap orphaned processes (as any PID 1 is supposed to do). The name change makes this behavioral difference more obvious to any system administrator listing processes in the container.

I'm not familiar with how __progname is used.

This comment has been minimized.

Show comment
Hide comment
@jnpkrn

jnpkrn May 3, 2017

Contributor

I am well aware of the primary PID 1 purpose :)

For listing processes in the container the PID is all you need
to tell the init process from others. Should the resource agents
running in such container need it to adapt their behaviour, they
can read /proc/1/exe link, for instance.

I think the bigger value would be for observing processes from
outside the container, in which case that suffix denoting depth
of nesting could be meaningful (e.g., in some complete stuck
scenario, start killing the most nested ones?).

@jnpkrn

jnpkrn May 3, 2017

Contributor

I am well aware of the primary PID 1 purpose :)

For listing processes in the container the PID is all you need
to tell the init process from others. Should the resource agents
running in such container need it to adapt their behaviour, they
can read /proc/1/exe link, for instance.

I think the bigger value would be for observing processes from
outside the container, in which case that suffix denoting depth
of nesting could be meaningful (e.g., in some complete stuck
scenario, start killing the most nested ones?).

char *p;
int i, maxlen;
char *LastArgv = NULL;
const char *name = "pcmk-init";
for(i = 0; i < argc; i++) {
if(!i || (LastArgv + 1 == argv[i]))
LastArgv = argv[i] + strlen(argv[i]);
}
for(i = 0; envp[i] != NULL; i++) {
if((LastArgv + 1) == envp[i]) {
LastArgv = envp[i] + strlen(envp[i]);
}
}
maxlen = (LastArgv - argv[0]) - 2;
i = strlen(name);
/* We can overwrite individual argv[] arguments */
snprintf(argv[0], maxlen, "%s", name);
/* Now zero out everything else */
p = &argv[0][i];
while(p < LastArgv)
*p++ = '\0';
argv[1] = NULL;
}
#endif /* HAVE___PROGNAME */
while (1) {
int sig;
size_t i;
sigwait(&set, &sig);
for (i = 0; i < DIMOF(sigmap); i++) {
if (sigmap[i].sig == sig) {
sigmap[i].handler();
break;
}
}
}
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
@@ -410,12 +528,14 @@ static struct crm_option long_options[] = {
/* *INDENT-ON* */
int
main(int argc, char **argv)
main(int argc, char **argv, char **envp)
{
int flag = 0;
int index = 0;
const char *option = NULL;
/* If necessary, create PID1 now before any FDs are opened */
spawn_pidone(argc, argv, envp);
#ifndef ENABLE_PCMK_REMOTE
crm_log_preinit("lrmd", argc, argv);

0 comments on commit 8abdd82

Please sign in to comment.