Skip to content

Commit

Permalink
Replace --flush-on-signal <int> with --forward-signal <int> in ti…
Browse files Browse the repository at this point in the history
…mem (#230)

* Replace `--flush-on-signal <int>` with `--forward-signal <int>` in timem.

- on Linux, components such as `cpu_util` and `peak_rss` depend on the
  underlying process being waited on.
- `--flush-on-signal <int>` may result in zombie processes if timem gets
  the signal but its process group does not.

* Update outdated user messages

Signed-off-by: Michel Hidalgo <michel@ekumenlabs.com>

* Formatting
  • Loading branch information
hidmic committed Oct 4, 2021
1 parent 1a62106 commit 4ca991b
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 55 deletions.
68 changes: 24 additions & 44 deletions source/tools/timemory-timem/timem.cpp
Expand Up @@ -34,7 +34,7 @@ std::exception_ptr global_exception_pointer = nullptr;
}

void
flush_on_signal(int);
forward_signal(int);
void
handle_exception();
void
Expand Down Expand Up @@ -245,16 +245,17 @@ main(int argc, char** argv)
p.get<bool>("network-stats"));
});
parser.add_argument()
.names({ "-F", "--flush-on-signal" })
.names({ "-F", "--forward-signal" })
.description(
"If any of these signals are sent to timem process, flush output and "
"exit.\n%{INDENT}%E.g. '--flush-on-signal 2' (default behavior) will cause "
"timem to dump it's output and exit if SIGINT (Cntl+C)\n%{INDENT}%is sent by "
"the user. Use '--flush-on-signal 0' to disable this behavior.")
"If any of these signals are sent to timem, forward them to the process.\n"
"%{INDENT}% E.g. '--forward-signal 2' (default behavior) will cause timem "
"to forward\n%{INDENT}% SIGINT to the process if (Cntl+C) is sent by the "
"user.\n"
"%{INDENT}% Use '--forward-signal 0' to disable this behavior.")
.dtype("int")
.min_count(1)
.action([&](parser_t& p) {
signal_flush() = p.get<std::set<int>>("flush-on-signal");
signal_forward() = p.get<std::set<int>>("forward-signal");
});
#if defined(TIMEMORY_USE_MPI)
parser
Expand Down Expand Up @@ -383,18 +384,18 @@ main(int argc, char** argv)
if(!use_sample())
signal_types().clear();

for(auto itr : signal_flush())
for(auto itr : signal_forward())
{
CONDITIONAL_PRINT_HERE((debug() && verbose() > 0),
"timem will stop, dump it's output, and exit if signal %i "
"is sent to this process (PID: %i)",
"timem will forward signal %i to its worker "
"process if it is sent to this process (PID: %i)",
itr, (int) tim::process::get_id());
if(signal_types().count(itr) > 0)
throw std::runtime_error(TIMEMORY_JOIN(
" ", "Error! timem sampler is using signal", itr,
"to handle the sampling measurements. Flushing output on this signal "
"will cause immediate termination. Re-run timem with the"
"'--disable-sampling' option to flush output on this signal"));
throw std::runtime_error(
TIMEMORY_JOIN(" ", "Error! timem sampler is using signal", itr,
"to handle the sampling measurements. Cannot forward it. "
"Re-run timem with the '--disable-sampling' option to "
"forward this signal"));
}

// set the signal handler on this process if using mpi so that we can read
Expand All @@ -405,13 +406,13 @@ main(int argc, char** argv)
create_signal_handler(TIMEM_PID_SIGNAL, get_signal_handler(TIMEM_PID_SIGNAL),
&childpid_catcher);
// allocate the signal handlers in map
for(auto itr : signal_flush())
for(auto itr : signal_forward())
(void) get_signal_handler(itr);
}
else
{
for(auto itr : signal_flush())
create_signal_handler(itr, get_signal_handler(itr), &flush_on_signal);
for(auto itr : signal_forward())
create_signal_handler(itr, get_signal_handler(itr), &forward_signal);
}

for(int i = 0; i < _argc; ++i)
Expand Down Expand Up @@ -716,9 +717,9 @@ childpid_catcher(int sig)
{
signal_delivered() = true;
restore_signal_handler(sig, get_signal_handler(sig));
// generate the signal handlers for flushing output
for(auto itr : signal_flush())
create_signal_handler(itr, get_signal_handler(itr), &flush_on_signal);
// generate the signal handlers for signal forwarding
for(auto itr : signal_forward())
create_signal_handler(itr, get_signal_handler(itr), &forward_signal);
int _worker = read_pid(master_pid());
worker_pid() = _worker;
tim::process::get_target_id() = _worker;
Expand All @@ -733,30 +734,9 @@ childpid_catcher(int sig)
//--------------------------------------------------------------------------------------//

void
flush_on_signal(int sig)
forward_signal(int sig)
{
if((debug() && verbose() > 1) || verbose() > 2)
std::cerr << "[BEFORE STOP][" << worker_pid() << "]> " << *get_measure()
<< std::endl;

CONDITIONAL_PRINT_HERE((debug() && verbose() > 1), "%s", "stopping sampler");
get_sampler()->stop();

CONDITIONAL_PRINT_HERE((debug() && verbose() > 1), "%s", "ignoring signals");
sampler_t::ignore(signal_types());

CONDITIONAL_PRINT_HERE((debug() && verbose() > 1), "%s", "barrier");
tim::mpi::barrier(tim::mpi::comm_world_v);

CONDITIONAL_PRINT_HERE((debug() && verbose() > 1), "%s", "processing");
parent_process(worker_pid());

CONDITIONAL_PRINT_HERE((debug() && verbose() > 1), "%s", "barrier");
tim::mpi::barrier(tim::mpi::comm_world_v);

CONDITIONAL_PRINT_HERE((debug() && verbose() > 1), "exit code = %i", sig);

std::exit(sig);
kill(worker_pid(), sig);
}

//--------------------------------------------------------------------------------------//
Expand Down
22 changes: 11 additions & 11 deletions source/tools/timemory-timem/timem.hpp
Expand Up @@ -882,16 +882,16 @@ struct timem_config
int verbose = tim::get_env("TIMEM_VERBOSE", 0);
string_t shell =
tim::get_env("TIMEM_SHELL", tim::get_env<string_t>("SHELL", getusershell()));
string_t shell_flags = tim::get_env<string_t>("TIMEM_SHELL_FLAGS", "");
string_t output_file = tim::get_env<string_t>("TIMEM_OUTPUT", "");
double sample_freq = tim::get_env<double>("TIMEM_SAMPLE_FREQ", 5.0);
double sample_delay = tim::get_env<double>("TIMEM_SAMPLE_DELAY", 1.0e-6);
pid_t master_pid = getpid();
pid_t worker_pid = getpid();
size_t buffer_size = 0;
string_t command = {};
std::set<int> signal_types = { SIGALRM };
std::set<int> signal_flush = { SIGINT };
string_t shell_flags = tim::get_env<string_t>("TIMEM_SHELL_FLAGS", "");
string_t output_file = tim::get_env<string_t>("TIMEM_OUTPUT", "");
double sample_freq = tim::get_env<double>("TIMEM_SAMPLE_FREQ", 5.0);
double sample_delay = tim::get_env<double>("TIMEM_SAMPLE_DELAY", 1.0e-6);
pid_t master_pid = getpid();
pid_t worker_pid = getpid();
size_t buffer_size = 0;
string_t command = {};
std::set<int> signal_types = { SIGALRM };
std::set<int> signal_forward = { SIGINT };
std::vector<std::string> argvector = {};
std::vector<hist_type> history = {};
std::unique_ptr<std::thread> buffer_thread = {};
Expand Down Expand Up @@ -938,7 +938,7 @@ TIMEM_CONFIG_FUNCTION(buffer_size)
TIMEM_CONFIG_FUNCTION(master_pid)
TIMEM_CONFIG_FUNCTION(worker_pid)
TIMEM_CONFIG_FUNCTION(signal_types)
TIMEM_CONFIG_FUNCTION(signal_flush)
TIMEM_CONFIG_FUNCTION(signal_forward)
TIMEM_CONFIG_FUNCTION(argvector)
TIMEM_CONFIG_FUNCTION(buffer_cv);
TIMEM_CONFIG_FUNCTION(buffer_thread);
Expand Down

0 comments on commit 4ca991b

Please sign in to comment.