Skip to content

Commit

Permalink
i#1921 native sig: Handle signals during DR initialization (#4662)
Browse files Browse the repository at this point in the history
Puts in place 6 fixes for handling signals during DR initialization,
typically in a start/stop setup where other threads are alive.

1) Copy the app handler at init time for delivering native signals
during init.

2) Reorder signal_arch_init(), which obtains the signal frame size, to
run before DR installs its handler.

3) Obtains the app handler before installing DR's handler, eliminating
a (narrow) race window.

4) Until DR starts executing the app, continues delivering native
signals and using the globally recorded app handler, to match how DR
init works.

5) Set detacher_tid between init and setup to avoid races like in
DR's handler at the end of init that were under #3535

6) Handle a race where the init thread has set the global try_except,
causing master_signal_handler_C to think an app thread's signal is
DR's.  We add a global_try_tid and check the thread id to solve this.

Augments api.detach_signal with signals sent during init.
This causes the test to hit a DR bug in the vsyscall hook (#4664) on
32-bit Linux.  For now we disable the test for 32-bit.

Also tested on a large proprietary application.

Issue: #1921, #3535, #4664
  • Loading branch information
derekbruening committed Jan 9, 2021
1 parent c87bc7c commit 568d7a6
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 26 deletions.
12 changes: 10 additions & 2 deletions core/dynamo.c
Original file line number Diff line number Diff line change
Expand Up @@ -634,9 +634,15 @@ dynamorio_app_init(void)

/* We can't clear this on detach like other vars b/c we need native threads
* to continue to avoid safe_read_tls_magic() in is_thread_tls_initialized().
* So we clear it on (re-)init.
* So we clear it on (re-)init in dynamorio_take_over_threads().
* From now until then, we avoid races where another thread invokes a
* safe_read during native signal delivery but we remove DR's handler before
* it reaches there and it is delivered to the app's handler instead, kind
* of like i#3535, by re-using the i#3535 mechanism of pointing at the only
* thread who could possibly have a dcontext.
* XXX: Should we rename this s/detacher_/singleton_/ or something?
*/
detacher_tid = INVALID_THREAD_ID;
detacher_tid = IF_UNIX_ELSE(get_sys_thread_id(), INVALID_THREAD_ID);
/* thread-specific initialization for the first thread we inject in
* (in a race with injected threads, sometimes it is not the primary thread)
*/
Expand Down Expand Up @@ -2936,6 +2942,8 @@ dynamorio_take_over_threads(dcontext_t *dcontext)
signal_event(dr_app_started);
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
dynamo_started = true;
/* Similarly, with our signal handler back in place, we remove the TLS limit. */
detacher_tid = INVALID_THREAD_ID;
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
/* XXX i#1305: we should suspend all the other threads for DR init to
* satisfy the parts of the init process that assume there are no races.
Expand Down
5 changes: 4 additions & 1 deletion core/globals.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2011-2020 Google, Inc. All rights reserved.
* Copyright (c) 2011-2021 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -807,6 +807,9 @@ typedef struct _try_except_t {
} try_except_t;

extern try_except_t global_try_except;
#ifdef UNIX
extern thread_id_t global_try_tid;
#endif

typedef struct {
/* WARNING: if you change the offsets of any of these fields,
Expand Down
69 changes: 56 additions & 13 deletions core/unix/signal.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2011-2020 Google, Inc. All rights reserved.
* Copyright (c) 2011-2021 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -516,8 +516,13 @@ d_r_signal_init(void)
IF_MACOS(ASSERT(sizeof(kernel_sigset_t) == sizeof(__darwin_sigset_t)));
os_itimers_thread_shared();

IF_LINUX(signalfd_init());
signal_arch_init();

/* Set up a handler for safe_read (or other fault detection) during
* DR init before thread is initialized.
* DR init before thread is initialized. We must do this *after* signal_arch_init()
* and other key init in case a native signal arrives right after we install
* our handler (i#1921).
*
* XXX: could set up a clone_record_t and pass to the initial
* signal_thread_inherit() but that would require further code changes.
Expand All @@ -532,9 +537,6 @@ d_r_signal_init(void)
kernel_sigaddset(&set, SIGSEGV);
kernel_sigaddset(&set, SIGBUS);
sigprocmask_syscall(SIG_UNBLOCK, &set, &init_sigmask, sizeof(set));

IF_LINUX(signalfd_init());
signal_arch_init();
}

void
Expand Down Expand Up @@ -1087,7 +1089,14 @@ signal_thread_inherit(dcontext_t *dcontext, void *clone_record)
info->we_intercept[i] = true;
}
} else {
/* we intercept the following signals ourselves: */
/* We intercept the following signals ourselves. If this is the first
* thread, currently doing DR init, the app handlers for these are in
* init_info. We do not copy those handlers into info at this time as we're
* not equipped yet to deliver signals. We wait for DR to start running the
* app (init can be separated from start for dr_app_* interfaces), when
* signal_reinstate_handlers() will record the app handlers. Signals
* received in the meantime will go through execute_native_handler().
*/
info->we_intercept[SIGSEGV] = true;
/* PR 313665: look for DR crashes on unaligned memory or mmap bounds */
info->we_intercept[SIGBUS] = true;
Expand Down Expand Up @@ -1494,8 +1503,11 @@ set_handler_and_record_app(dcontext_t *dcontext, thread_sig_info_t *info, int si
kernel_sigaction_t oldact;
ASSERT(sig <= MAX_SIGNUM);

/* arm the signal */
rc = sigaction_syscall(sig, act, &oldact);
/* Get the app's handler first, to handle a race where a signal arrives in
* between. That trumps a race where the app changes its handler which is much
* less likely.
*/
rc = sigaction_syscall(sig, NULL, &oldact);
ASSERT(rc ==
0
/* Workaround for PR 223720, which was fixed in ESX4.0 but
Expand Down Expand Up @@ -1536,6 +1548,13 @@ set_handler_and_record_app(dcontext_t *dcontext, thread_sig_info_t *info, int si
oldact.handler, oldact.flags, sig);
}
#endif
/* Record the app handler for delivering native signals during initialization.
* XXX: Should we rename this s/detached_/native_/ or something?
*/
d_r_write_lock(&detached_sigact_lock);
memcpy(&detached_sigact[sig], info->app_sigaction[sig],
sizeof(detached_sigact[sig]));
d_r_write_unlock(&detached_sigact_lock);
} else {
LOG(THREAD, LOG_ASYNCH, 2,
"prior handler is " PFX " vs master " PFX " with flags=0x%x for signal %d\n",
Expand All @@ -1549,6 +1568,18 @@ set_handler_and_record_app(dcontext_t *dcontext, thread_sig_info_t *info, int si
d_r_mutex_unlock(info->shared_lock);
}
}

/* Arm the signal. */
rc = sigaction_syscall(sig, act, NULL);
ASSERT(rc ==
0
/* Workaround for PR 223720, which was fixed in ESX4.0 but
* is present in ESX3.5 and earlier: vmkernel treats
* 63 and 64 as invalid signal numbers.
*/
IF_VMX86(|| (sig >= 63 && rc == -EINVAL)));
if (rc != 0) /* be defensive: app will probably still work */
return;
LOG(THREAD, LOG_ASYNCH, 3, "\twe intercept signal %d\n", sig);
}

Expand Down Expand Up @@ -5036,7 +5067,8 @@ master_signal_handler_C(byte *xsp)
*/
if (dcontext == NULL && (sig == SIGSEGV || sig == SIGBUS) &&
(is_safe_read_ucxt(ucxt) ||
(!dynamo_initialized && global_try_except.try_except_state != NULL))) {
(!dynamo_initialized && global_try_except.try_except_state != NULL &&
global_try_tid == get_sys_thread_id()))) {
dcontext = GLOBAL_DCONTEXT;
}

Expand Down Expand Up @@ -5233,7 +5265,8 @@ master_signal_handler_C(byte *xsp)
#endif
if (is_safe_read_ucxt(ucxt) ||
(!dynamo_initialized && global_try_except.try_except_state != NULL) ||
dcontext->try_except.try_except_state != NULL) {
(dcontext != GLOBAL_DCONTEXT &&
dcontext->try_except.try_except_state != NULL)) {
/* handle our own TRY/EXCEPT */
try_except_context_t *try_cxt;
#ifdef HAVE_MEMINFO
Expand All @@ -5242,6 +5275,8 @@ master_signal_handler_C(byte *xsp)
SYSLOG_INTERNAL_WARNING_ONCE("(1+x) Handling our fault in a TRY at " PFX, pc);
#endif
LOG(THREAD, LOG_ALL, level, "TRY fault at " PFX "\n", pc);
ASSERT(dynamo_initialized || global_try_except.try_except_state == NULL ||
global_try_tid == get_sys_thread_id());
if (TEST(DUMPCORE_TRY_EXCEPT, DYNAMO_OPTION(dumpcore_mask)))
os_dump_core("try/except fault");

Expand All @@ -5251,8 +5286,9 @@ master_signal_handler_C(byte *xsp)
*/
break;
}
try_cxt = (dcontext != NULL) ? dcontext->try_except.try_except_state
: global_try_except.try_except_state;
try_cxt = (dcontext != NULL && dcontext != GLOBAL_DCONTEXT)
? dcontext->try_except.try_except_state
: global_try_except.try_except_state;
ASSERT(try_cxt != NULL);

/* The exception interception code did an ENTER so we must EXIT here */
Expand All @@ -5273,6 +5309,7 @@ master_signal_handler_C(byte *xsp)
DR_LONGJMP(&try_cxt->context, LONGJMP_EXCEPTION);
ASSERT_NOT_REACHED();
}
ASSERT(dcontext != GLOBAL_DCONTEXT); /* Had to be try/except. */

target = compute_memory_target(dcontext, pc, ucxt, siginfo, &is_write);

Expand Down Expand Up @@ -5858,9 +5895,15 @@ execute_native_handler(dcontext_t *dcontext, int sig, sigframe_rt_t *our_frame)
thread_sig_info_t *info;
LOG(THREAD, LOG_ASYNCH, 2, "%s for signal %d in thread " TIDFMT "\n", __FUNCTION__,
sig, get_sys_thread_id());
if (dcontext != NULL) {
if (dcontext != NULL && is_thread_signal_info_initialized(dcontext) &&
/* For the DR init thread, we don't place the app handlers into signal_info
* until DR starts running code and is equipped to deliver a managed signal.
* So during init, until dynamo_started is set, we deliver natively.
*/
dynamo_started) {
info = (thread_sig_info_t *)dcontext->signal_field;
} else {
dcontext = NULL; /* Clear for the not-yet-start or not-init cases above. */
if (atomic_read_bool(&multiple_handlers_present)) {
/* See i#1921 comment up top: we don't handle this. */
report_unhandleable_signal_and_exit(sig, "multiple native handlers");
Expand Down
5 changes: 4 additions & 1 deletion core/utils.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2010-2020 Google, Inc. All rights reserved.
* Copyright (c) 2010-2021 Google, Inc. All rights reserved.
* Copyright (c) 2017 ARM Limited. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/
Expand Down Expand Up @@ -76,6 +76,9 @@
#include <stddef.h> /* for offsetof */

try_except_t global_try_except;
#ifdef UNIX
thread_id_t global_try_tid = INVALID_THREAD_ID;
#endif

int do_once_generation = 1;

Expand Down
2 changes: 2 additions & 0 deletions core/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,7 @@ extern mutex_t do_threshold_mutex;
if ((dc__local == NULL || dc__local == GLOBAL_DCONTEXT) && \
!dynamo_initialized) { \
try__except = &global_try_except; \
IF_UNIX(global_try_tid = get_sys_thread_id()); \
} else { \
if (dc__local == GLOBAL_DCONTEXT) \
dc__local = get_thread_private_dcontext(); \
Expand All @@ -1416,6 +1417,7 @@ extern mutex_t do_threshold_mutex;
} \
ASSERT(try__except != NULL); \
TRY(try__except, try_statement, EXCEPT(try__except, except_statement)); \
IF_UNIX(global_try_tid = INVALID_THREAD_ID); \
} while (0)

/* these use do..while w/ a local to avoid double-eval of dcontext */
Expand Down
6 changes: 4 additions & 2 deletions suite/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2850,8 +2850,10 @@ if (CLIENT_INTERFACE)
endif ()
endif ()
if (UNIX)
tobuild_api(api.detach_signal api/detach_signal.cpp "" "" OFF OFF)
link_with_pthread(api.detach_signal)
if (X64) # TODO i#4664: Fix crash in vsyscall hook under native threads.
tobuild_api(api.detach_signal api/detach_signal.cpp "" "" OFF OFF)
link_with_pthread(api.detach_signal)
endif ()
endif ()
if (NOT WIN32) # XXX i#2611: fix for Windows
if (X64)
Expand Down
51 changes: 44 additions & 7 deletions suite/tests/api/detach_signal.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2011-2020 Google, Inc. All rights reserved.
* Copyright (c) 2011-2021 Google, Inc. All rights reserved.
* Copyright (c) 2003-2008 VMware, Inc. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -79,12 +79,18 @@ handle_signal(int signal, siginfo_t *siginfo, ucontext_t *ucxt)
};
int res = sigprocmask(SIG_BLOCK, NULL, &actual_mask);
assert(res == 0);
sigset_t expect_mask;
memcpy(&expect_mask, &handler_mask, sizeof(expect_mask));
sigaddset(&expect_mask, signal);
sigaddset(&expect_mask, SIGUSR1);
sigaddset(&expect_mask, SIGURG);
assert(memcmp(&expect_mask, &actual_mask, sizeof(expect_mask)) == 0);
sigset_t expect_mask1;
memcpy(&expect_mask1, &handler_mask, sizeof(expect_mask1));
sigaddset(&expect_mask1, signal);
sigaddset(&expect_mask1, SIGUSR1);
sigaddset(&expect_mask1, SIGURG);
/* We also have init-time signal tests with a different mask. */
sigset_t expect_mask2;
memcpy(&expect_mask2, &handler_mask, sizeof(expect_mask2));
sigaddset(&expect_mask2, signal);
sigaddset(&expect_mask2, SIGUSR2);
assert(memcmp(&expect_mask1, &actual_mask, sizeof(expect_mask1)) == 0 ||
memcmp(&expect_mask2, &actual_mask, sizeof(expect_mask2)) == 0);

count++;
SIGLONGJMP(mark, count);
Expand All @@ -96,9 +102,40 @@ sideline_spinner(void *arg)
unsigned int idx = (unsigned int)(uintptr_t)arg;
if (dr_app_running_under_dynamorio())
print("ERROR: thread %d should NOT be under DynamoRIO\n", idx);

if (idx == 0) {
/* Delay attach to help test i#4640 where a signal arrives in a native thread
* during DR takeover.
*/
sigset_t delay_attach_mask;
sigemptyset(&delay_attach_mask);
sigaddset(&delay_attach_mask, SIGUSR2); /* DR's takeover signal. */
int res = sigprocmask(SIG_SETMASK, &delay_attach_mask, NULL);
assert(res == 0);
}

VPRINT("%d signaling sideline_ready\n", idx);
signal_cond_var(sideline_ready[idx]);

if (idx == 0) {
/* Spend some time generating signals while SIGUSR2 is blocked to try and
* generate some after DR starts takeover and puts its own handler in place,
* but before it can take us over.
*/
for (int i = 0; i < 10000; i++) {
if (SIGSETJMP(mark) == 0) {
*(int *)arg = 42; /* SIGSEGV */
}
if (SIGSETJMP(mark) == 0) {
pthread_kill(pthread_self(), SIGURG);
}
}
sigset_t clear_mask;
sigemptyset(&clear_mask);
int res = sigprocmask(SIG_SETMASK, &clear_mask, NULL);
assert(res == 0);
}

VPRINT("%d waiting for continue\n", idx);
wait_cond_var(sideline_continue);
if (!dr_app_running_under_dynamorio())
Expand Down

0 comments on commit 568d7a6

Please sign in to comment.