/
signal.c
6338 lines (5934 loc) · 252 KB
/
signal.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* **********************************************************
* Copyright (c) 2011-2016 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2003-2007 Determina Corp. */
/* Copyright (c) 2001-2003 Massachusetts Institute of Technology */
/* Copyright (c) 2000-2001 Hewlett-Packard Company */
/*
* signal.c - dynamorio signal handler
*/
#include <errno.h>
#undef errno
#include "signal_private.h" /* pulls in globals.h for us, in right order */
#ifdef LINUX
/* We want to build on older toolchains so we have our own copy of signal
* data structures
*/
# include "include/sigcontext.h"
# include "include/signalfd.h"
# include "../globals.h" /* after our sigcontext.h, to preclude bits/sigcontext.h */
#elif defined(MACOS)
# include "../globals.h" /* this defines _XOPEN_SOURCE for Mac */
# include <signal.h> /* after globals.h, for _XOPEN_SOURCE from os_exports.h */
#endif
#ifdef LINUX
# include <linux/sched.h>
#endif
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <ucontext.h>
#include <string.h> /* for memcpy and memset */
#include "os_private.h"
#include "../fragment.h"
#include "../fcache.h"
#include "../perfctr.h"
#include "arch.h"
#include "../monitor.h" /* for trace_abort */
#include "../link.h" /* for linking interrupted fragment_t */
#include "instr.h" /* to find target of SIGSEGV */
#include "decode.h" /* to find target of SIGSEGV */
#include "decode_fast.h" /* to handle self-mod code */
#include "../synch.h"
#include "../nudge.h"
#include "disassemble.h"
#include "ksynch.h"
#include "tls.h" /* tls_reinstate_selector */
#include "../translate.h"
#ifdef LINUX
# include "include/syscall.h"
#else
# include <sys/syscall.h>
#endif
#ifdef CLIENT_INTERFACE
# include "instrument.h"
#endif
#ifdef VMX86_SERVER
# include <errno.h>
#endif
#ifdef MACOS
/* Define the Linux names, which the code is already using */
# define SA_NOMASK SA_NODEFER
# define SA_ONESHOT SA_RESETHAND
#endif
/**** data structures ***************************************************/
/* The signal numbers are slightly different between operating systems.
* To support differing default actions, we have separate arrays, rather
* than indirecting to a single all-signals array.
*/
extern int default_action[];
/* We know that many signals are always asynchronous.
* Others, however, may be synchronous or may not -- e.g., another process
* could send us a SIGSEGV, and there is no way we can tell whether it
* was generated by a real memory fault or not. Thus we have to assume
* that we must not delay any SIGSEGV deliveries.
*/
extern bool can_always_delay[];
static inline bool
sig_is_alarm_signal(int sig)
{
return (sig == SIGALRM || sig == SIGVTALRM || sig == SIGPROF);
}
/* we do not use SIGSTKSZ b/c for things like code modification
* we end up calling many core routines and so want more space
* (though currently non-debug stack size == SIGSTKSZ (8KB))
*/
/* this size is assumed in heap.c's threadunits_exit leak relaxation */
#define SIGSTACK_SIZE DYNAMORIO_STACK_SIZE
/* this flag not defined in our headers */
#define SA_RESTORER 0x04000000
/* if no app sigaction, it's RT, since that's our handler */
#ifdef LINUX
# define IS_RT_FOR_APP(info, sig) \
IF_X64_ELSE(true, ((info)->app_sigaction[(sig)] == NULL ? true : \
(TEST(SA_SIGINFO, (info)->app_sigaction[(sig)]->flags))))
#elif defined(MACOS)
# define IS_RT_FOR_APP(info, sig) (true)
#endif
/* kernel sets size and sp to 0 for SS_DISABLE
* when asked, will hand back SS_ONSTACK only if current xsp is inside the
* alt stack; otherwise, if an alt stack is registered, it will give flags of 0
* We do not support the "legacy stack switching" that uses the restorer field
* as seen in kernel sources.
*/
#define APP_HAS_SIGSTACK(info) \
((info)->app_sigstack.ss_sp != NULL && (info)->app_sigstack.ss_flags != SS_DISABLE)
/* Extra space needed to put the signal frame on the app stack. We include the
* size of the extra padding potentially needed to align these structs. We
* assume the stack pointer is 4-aligned already, so we over estimate padding
* size by the alignment minus 4.
*/
#ifdef LINUX
/* An extra 4 for trailing FP_XSTATE_MAGIC2 */
# define AVX_FRAME_EXTRA (sizeof(struct _xstate) + AVX_ALIGNMENT - 4 + 4)
# define FPSTATE_FRAME_EXTRA (sizeof(struct _fpstate) + FPSTATE_ALIGNMENT - 4)
# define XSTATE_FRAME_EXTRA (YMM_ENABLED() ? AVX_FRAME_EXTRA : FPSTATE_FRAME_EXTRA)
# define AVX_DATA_SIZE (sizeof(struct _xstate) + 4)
# define FPSTATE_DATA_SIZE (sizeof(struct _fpstate))
# define XSTATE_DATA_SIZE (YMM_ENABLED() ? AVX_DATA_SIZE : FPSTATE_DATA_SIZE)
#elif defined(MACOS)
/* Currently assuming __darwin_mcontext_avx{32,64} is always used in the
* frame. If instead __darwin_mcontext{32,64} is used (w/ just float and no AVX)
* on, say, older machines or OSX versions, we'll have to revisit this.
*/
# define AVX_FRAME_EXTRA 0
# define FPSTATE_FRAME_EXTRA 0
# define XSTATE_FRAME_EXTRA 0
# define AVX_DATA_SIZE 0
# define FPSTATE_DATA_SIZE 0
# define XSTATE_DATA_SIZE 0
#endif
/* If we only intercept a few signals, we leave whether un-intercepted signals
* are blocked unchanged and stored in the kernel. If we intercept all (not
* quite yet: PR 297033, hence the need for this macro) we emulate the mask for
* all.
*/
#define EMULATE_SIGMASK(info, sig) \
(DYNAMO_OPTION(intercept_all_signals) || (info)->we_intercept[(sig)])
/* i#27: custom data to pass to the child of a clone */
/* PR i#149/403015: clone record now passed via a new dstack */
typedef struct _clone_record_t {
byte *dstack; /* dstack for new thread - allocated by parent thread */
#ifdef MACOS
/* XXX i#1403: once we have lower-level, earlier thread interception we can
* likely switch to something closer to what we do on Linux.
* This is used for bsdthread_create, where app_thread_xsp is NULL;
* for vfork, app_thread_xsp is non-NULL and this is unused.
*/
void *thread_arg;
#endif
reg_t app_thread_xsp; /* app xsp preserved for new thread to use */
app_pc continuation_pc;
thread_id_t caller_id;
int clone_sysnum;
uint clone_flags;
thread_sig_info_t info;
thread_sig_info_t *parent_info;
void *pcprofile_info;
#ifdef AARCHXX
/* To ensure we have the right value as of the point of the clone, we
* store it here (we'll have races if we try to get it during new thread
* init).
*/
reg_t app_stolen_value;
# ifndef AARCH64
dr_isa_mode_t isa_mode;
# endif
/* To ensure we have the right app lib tls base in child thread,
* we store it here if necessary (clone w/o CLONE_SETTLS or vfork).
*/
void *app_lib_tls_base;
#endif
/* we leave some padding at base of stack for dynamorio_clone
* to store values
*/
reg_t for_dynamorio_clone[4];
} clone_record_t;
/* i#350: set up signal handler for safe_read/faults during init */
static thread_sig_info_t init_info;
static kernel_sigset_t init_sigmask;
#ifdef DEBUG
static bool removed_sig_handler;
#endif
/**** function prototypes ***********************************************/
/* in x86.asm */
void
master_signal_handler(int sig, siginfo_t *siginfo, kernel_ucontext_t *ucxt);
static void
intercept_signal(dcontext_t *dcontext, thread_sig_info_t *info, int sig);
static void
signal_info_init_sigaction(dcontext_t *dcontext, thread_sig_info_t *info);
static void
signal_info_exit_sigaction(dcontext_t *dcontext, thread_sig_info_t *info,
bool other_thread);
static bool
execute_handler_from_cache(dcontext_t *dcontext, int sig, sigframe_rt_t *our_frame,
sigcontext_t *sc_orig, fragment_t *f
_IF_CLIENT(byte *access_address));
static bool
execute_handler_from_dispatch(dcontext_t *dcontext, int sig);
/* Execute default action from code cache and may terminate the process.
* If returns, the return value decides if caller should restore
* the untranslated context.
*/
static bool
execute_default_from_cache(dcontext_t *dcontext, int sig, sigframe_rt_t *frame,
sigcontext_t *sc_orig);
static void
execute_default_from_dispatch(dcontext_t *dcontext, int sig, sigframe_rt_t *frame);
static bool
handle_alarm(dcontext_t *dcontext, int sig, kernel_ucontext_t *ucxt);
static bool
handle_suspend_signal(dcontext_t *dcontext, kernel_ucontext_t *ucxt);
static bool
handle_nudge_signal(dcontext_t *dcontext, siginfo_t *siginfo, kernel_ucontext_t *ucxt);
static void
init_itimer(dcontext_t *dcontext, bool first);
static bool
set_actual_itimer(dcontext_t *dcontext, int which, thread_sig_info_t *info,
bool enable);
#ifdef DEBUG
static void
dump_sigset(dcontext_t *dcontext, kernel_sigset_t *set);
#endif
static bool
is_sys_kill(dcontext_t *dcontext, byte *pc, byte *xsp, siginfo_t *info);
static inline int
sigaction_syscall(int sig, kernel_sigaction_t *act, kernel_sigaction_t *oact)
{
#if defined(X64) && !defined(VMX86_SERVER) && defined(LINUX)
/* PR 305020: must have SA_RESTORER for x64 */
if (act != NULL && !TEST(SA_RESTORER, act->flags)) {
act->flags |= SA_RESTORER;
act->restorer = (void (*)(void)) dynamorio_sigreturn;
}
#endif
return dynamorio_syscall(IF_MACOS_ELSE(SYS_sigaction,SYS_rt_sigaction),
4, sig, act, oact, sizeof(kernel_sigset_t));
}
static inline int
sigaltstack_syscall(const stack_t *newstack, stack_t *oldstack)
{
return dynamorio_syscall(SYS_sigaltstack, 2, newstack, oldstack);
}
static inline int
getitimer_syscall(int which, struct itimerval *val)
{
return dynamorio_syscall(SYS_getitimer, 2, which, val);
}
static inline int
setitimer_syscall(int which, struct itimerval *val, struct itimerval *old)
{
return dynamorio_syscall(SYS_setitimer, 3, which, val, old);
}
static inline int
sigprocmask_syscall(int how, kernel_sigset_t *set, kernel_sigset_t *oset,
size_t sigsetsize)
{
return dynamorio_syscall(IF_MACOS_ELSE(SYS_sigprocmask,SYS_rt_sigprocmask),
4, how, set, oset, sigsetsize);
}
static void
unblock_all_signals(kernel_sigset_t *oset)
{
kernel_sigset_t set;
kernel_sigemptyset(&set);
sigprocmask_syscall(SIG_SETMASK, &set, oset, sizeof(set));
}
/* exported for stackdump.c */
bool
set_default_signal_action(int sig)
{
kernel_sigset_t set;
kernel_sigaction_t act;
int rc;
memset(&act, 0, sizeof(act));
act.handler = (handler_t) SIG_DFL;
/* arm the signal */
rc = sigaction_syscall(sig, &act, NULL);
DODEBUG({ removed_sig_handler = true; });
/* If we're in our handler now, we have to unblock */
kernel_sigemptyset(&set);
kernel_sigaddset(&set, sig);
sigprocmask_syscall(SIG_UNBLOCK, &set, NULL, sizeof(set));
return (rc == 0);
}
/* We assume that signal handlers will be shared most of the time
* (pthreads shares them)
* Rather than start out with the handler table in local memory and then
* having to transfer to global, we just always use global
*/
static void
handler_free(dcontext_t *dcontext, void *p, size_t size)
{
global_heap_free(p, size HEAPACCT(ACCT_OTHER));
}
static void *
handler_alloc(dcontext_t *dcontext, size_t size)
{
return global_heap_alloc(size HEAPACCT(ACCT_OTHER));
}
/**** top-level routines ***********************************************/
static bool
os_itimers_thread_shared(void)
{
static bool itimers_shared;
static bool cached = false;
if (!cached) {
file_t f = os_open("/proc/version", OS_OPEN_READ);
if (f != INVALID_FILE) {
char buf[128];
int major, minor, rel;
os_read(f, buf, BUFFER_SIZE_ELEMENTS(buf));
NULL_TERMINATE_BUFFER(buf);
if (sscanf(buf, "%*s %*s %d.%d.%d", &major, &minor, &rel) == 3) {
/* Linux NPTL in kernel 2.6.12+ has POSIX-style itimers shared
* among threads.
*/
LOG(GLOBAL, LOG_ASYNCH, 1, "kernel version = %d.%d.%d\n",
major, minor, rel);
itimers_shared = ((major == 2 && minor >= 6 && rel >= 12) ||
(major >= 3 /* linux-3.0 or above */));
cached = true;
}
os_close(f);
}
if (!cached) {
/* assume not shared */
itimers_shared = false;
cached = true;
}
LOG(GLOBAL, LOG_ASYNCH, 1, "itimers are %s\n",
itimers_shared ? "thread-shared" : "thread-private");
}
return itimers_shared;
}
void
signal_init()
{
IF_LINUX(IF_X86_64(ASSERT(ALIGNED(offsetof(sigpending_t, xstate), AVX_ALIGNMENT))));
IF_MACOS(ASSERT(sizeof(kernel_sigset_t) == sizeof(__darwin_sigset_t)));
os_itimers_thread_shared();
/* Set up a handler for safe_read (or other fault detection) during
* DR init before thread is initialized.
*
* XXX: could set up a clone_record_t and pass to the initial
* signal_thread_inherit() but that would require further code changes.
* Could also call signal_thread_inherit to init this, but we don't want
* to intercept timer signals, etc. before we're ready to handle them,
* so we do a partial init.
*/
signal_info_init_sigaction(GLOBAL_DCONTEXT, &init_info);
intercept_signal(GLOBAL_DCONTEXT, &init_info, SIGSEGV);
intercept_signal(GLOBAL_DCONTEXT, &init_info, SIGBUS);
unblock_all_signals(&init_sigmask);
IF_LINUX(signalfd_init());
}
void
signal_exit()
{
IF_LINUX(signalfd_exit());
#ifdef DEBUG
if (stats->loglevel > 0 && (stats->logmask & (LOG_ASYNCH|LOG_STATS)) != 0) {
LOG(GLOBAL, LOG_ASYNCH|LOG_STATS, 1,
"Total signals delivered: %d\n", GLOBAL_STAT(num_signals));
}
#endif
}
void
signal_thread_init(dcontext_t *dcontext)
{
#ifdef HAVE_SIGALTSTACK
int rc;
#endif
thread_sig_info_t *info = HEAP_TYPE_ALLOC(dcontext, thread_sig_info_t,
ACCT_OTHER, PROTECTED);
/* all fields want to be initialized to 0 */
memset(info, 0, sizeof(thread_sig_info_t));
dcontext->signal_field = (void *) info;
/* our special heap to avoid reentrancy problems
* composed entirely of sigpending_t units
* Note that it's fine to have the special heap do page-at-a-time
* committing, which does not use locks (unless triggers reset!),
* but if we need a new unit that will grab a lock: we try to
* avoid that by limiting the # of pending alarm signals (PR 596768).
*/
info->sigheap = special_heap_init(sizeof(sigpending_t),
false /* cannot have any locking */,
false /* -x */,
true /* persistent */);
#ifdef HAVE_SIGALTSTACK
/* set up alternate stack
* i#552 we may terminate the process without freeing the stack, so we
* stack_alloc it to exempt from the memory leak check.
*/
info->sigstack.ss_sp = (char *) stack_alloc(SIGSTACK_SIZE, NULL) - SIGSTACK_SIZE;
info->sigstack.ss_size = SIGSTACK_SIZE;
/* kernel will set xsp to sp+size to grow down from there, we don't have to */
info->sigstack.ss_flags = 0;
rc = sigaltstack_syscall(&info->sigstack, &info->app_sigstack);
ASSERT(rc == 0);
LOG(THREAD, LOG_ASYNCH, 1, "signal stack is "PFX" - "PFX"\n",
info->sigstack.ss_sp, info->sigstack.ss_sp + info->sigstack.ss_size);
/* app_sigstack dealt with below, based on parentage */
#endif
kernel_sigemptyset(&info->app_sigblocked);
ASSIGN_INIT_LOCK_FREE(info->child_lock, child_lock);
/* someone must call signal_thread_inherit() to finish initialization:
* for first thread, called from initial setup; else, from new_thread_setup
* or share_siginfo_after_take_over.
*/
}
/* i#27: create custom data to pass to the child of a clone
* since we can't rely on being able to find the caller, or that
* its syscall data is still valid, once in the child.
*
* i#149/ PR 403015: The clone record is passed to the new thread via the dstack
* created for it. Unlike before, where the child thread would create its own
* dstack, now the parent thread creates the dstack. Also, switches app stack
* to dstack.
*
* XXX i#1403: for Mac we want to eventually do lower-level earlier interception
* of threads, but for now we're later and higher-level, intercepting the user
* thread function on the new thread's stack. We ignore app_thread_xsp.
*/
void *
#ifdef MACOS
create_clone_record(dcontext_t *dcontext, reg_t *app_thread_xsp,
app_pc thread_func, void *thread_arg)
#else
create_clone_record(dcontext_t *dcontext, reg_t *app_thread_xsp)
#endif
{
clone_record_t *record;
byte *dstack = stack_alloc(DYNAMORIO_STACK_SIZE, NULL);
LOG(THREAD, LOG_ASYNCH, 1,
"create_clone_record: dstack for new thread is "PFX"\n", dstack);
#ifdef MACOS
if (app_thread_xsp == NULL) {
record = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, clone_record_t,
ACCT_THREAD_MGT, true/*prot*/);
record->app_thread_xsp = 0;
record->continuation_pc = thread_func;
record->thread_arg = thread_arg;
record->clone_flags = CLONE_THREAD | CLONE_VM | CLONE_SIGHAND | SIGCHLD;
} else {
#endif
/* Note, the stack grows to low memory addr, so dstack points to the high
* end of the allocated stack region. So, we must subtract to get space for
* the clone record.
*/
record = (clone_record_t *) (dstack - sizeof(clone_record_t));
record->app_thread_xsp = *app_thread_xsp;
/* asynch_target is set in dispatch() prior to calling pre_system_call(). */
record->continuation_pc = dcontext->asynch_target;
record->clone_flags = dcontext->sys_param0;
#ifdef MACOS
}
#endif
LOG(THREAD, LOG_ASYNCH, 1, "allocated clone record: "PFX"\n", record);
record->dstack = dstack;
record->caller_id = dcontext->owning_thread;
record->clone_sysnum = dcontext->sys_num;
record->info = *((thread_sig_info_t *)dcontext->signal_field);
record->parent_info = (thread_sig_info_t *) dcontext->signal_field;
record->pcprofile_info = dcontext->pcprofile_field;
#ifdef AARCHXX
record->app_stolen_value = get_stolen_reg_val(get_mcontext(dcontext));
# ifndef AARCH64
record->isa_mode = dr_get_isa_mode(dcontext);
# endif
/* If the child thread shares the same TLS with parent by not setting
* CLONE_SETTLS or vfork, we put the TLS base here and clear the
* thread register in new_thread_setup, so that DR can distinguish
* this case from normal pthread thread creation.
*/
record->app_lib_tls_base = (!TEST(CLONE_SETTLS, record->clone_flags)) ?
os_get_app_tls_base(dcontext, TLS_REG_LIB) : NULL;
#endif
LOG(THREAD, LOG_ASYNCH, 1,
"create_clone_record: thread "TIDFMT", pc "PFX"\n",
record->caller_id, record->continuation_pc);
#ifdef MACOS
if (app_thread_xsp != NULL) {
#endif
/* Set the thread stack to point to the dstack, below the clone record.
* Note: it's glibc who sets up the arg to the thread start function;
* the kernel just does a fork + stack swap, so we can get away w/ our
* own stack swap if we restore before the glibc asm code takes over.
*/
/* i#754: set stack to be XSTATE aligned for saving YMM registers */
ASSERT(ALIGNED(XSTATE_ALIGNMENT, REGPARM_END_ALIGN));
*app_thread_xsp = ALIGN_BACKWARD(record, XSTATE_ALIGNMENT);
#ifdef MACOS
}
#endif
return (void *) record;
}
/* This is to support dr_create_client_thread() */
void
set_clone_record_fields(void *record, reg_t app_thread_xsp, app_pc continuation_pc,
uint clone_sysnum, uint clone_flags)
{
clone_record_t *rec = (clone_record_t *) record;
ASSERT(rec != NULL);
rec->app_thread_xsp = app_thread_xsp;
rec->continuation_pc = continuation_pc;
rec->clone_sysnum = clone_sysnum;
rec->clone_flags = clone_flags;
}
/* i#149/PR 403015: The clone record is passed to the new thread by placing it
* at the bottom of the dstack, i.e., the high memory. So the new thread gets
* it from the base of the dstack. The dstack is then set as the app stack.
*
* CAUTION: don't use a lot of stack in this routine as it gets invoked on the
* dstack from new_thread_setup - this is because this routine assumes
* no more than a page of dstack has been used so far since the clone
* system call was done.
*/
void *
get_clone_record(reg_t xsp)
{
clone_record_t *record;
byte *dstack_base;
/* xsp should be in a dstack, i.e., dynamorio heap. */
ASSERT(is_dynamo_address((app_pc) xsp));
/* The (size of the clone record +
* stack used by new_thread_start (only for setting up priv_mcontext_t) +
* stack used by new_thread_setup before calling get_clone_record())
* is less than a page. This is verified by the assert below. If it does
* exceed a page, it won't happen at random during runtime, but in a
* predictable way during development, which will be caught by the assert.
* The current usage is about 800 bytes for clone_record +
* sizeof(priv_mcontext_t) + few words in new_thread_setup before
* get_clone_record() is called.
*/
dstack_base = (byte *) ALIGN_FORWARD(xsp, PAGE_SIZE);
record = (clone_record_t *) (dstack_base - sizeof(clone_record_t));
/* dstack_base and the dstack in the clone record should be the same. */
ASSERT(dstack_base == record->dstack);
#ifdef MACOS
ASSERT(record->app_thread_xsp != 0); /* else it's not in dstack */
#endif
return (void *) record;
}
/* i#149/PR 403015: App xsp is passed to the new thread via the clone record. */
reg_t
get_clone_record_app_xsp(void *record)
{
ASSERT(record != NULL);
return ((clone_record_t *) record)->app_thread_xsp;
}
#ifdef MACOS
void *
get_clone_record_thread_arg(void *record)
{
ASSERT(record != NULL);
return ((clone_record_t *) record)->thread_arg;
}
#endif
byte *
get_clone_record_dstack(void *record)
{
ASSERT(record != NULL);
return ((clone_record_t *) record)->dstack;
}
#ifdef AARCHXX
reg_t
get_clone_record_stolen_value(void *record)
{
ASSERT(record != NULL);
return ((clone_record_t *) record)->app_stolen_value;
}
# ifndef AARCH64
uint /* dr_isa_mode_t but we have a header ordering problem */
get_clone_record_isa_mode(void *record)
{
ASSERT(record != NULL);
return ((clone_record_t *) record)->isa_mode;
}
# endif
void
set_thread_register_from_clone_record(void *record)
{
/* If record->app_lib_tls_base is not NULL, it means the parent
* thread did not setup TLS for the child, and we need clear the
* thread register.
*/
if (((clone_record_t *)record)->app_lib_tls_base != NULL)
write_thread_register(NULL);
}
void
set_app_lib_tls_base_from_clone_record(dcontext_t *dcontext, void *record)
{
if (((clone_record_t *)record)->app_lib_tls_base != NULL) {
/* child and parent share the same TLS */
os_set_app_tls_base(dcontext, TLS_REG_LIB,
((clone_record_t *)record)->app_lib_tls_base);
}
}
#endif
/* Initializes info's app_sigaction, restorer_valid, and we_intercept fields */
static void
signal_info_init_sigaction(dcontext_t *dcontext, thread_sig_info_t *info)
{
info->app_sigaction = (kernel_sigaction_t **)
handler_alloc(dcontext, SIGARRAY_SIZE * sizeof(kernel_sigaction_t *));
memset(info->app_sigaction, 0, SIGARRAY_SIZE * sizeof(kernel_sigaction_t *));
memset(&info->restorer_valid, -1, SIGARRAY_SIZE * sizeof(info->restorer_valid[0]));
info->we_intercept = (bool *) handler_alloc(dcontext, SIGARRAY_SIZE * sizeof(bool));
memset(info->we_intercept, 0, SIGARRAY_SIZE * sizeof(bool));
}
/* Cleans up info's app_sigaction, restorer_valid, and we_intercept fields */
static void
signal_info_exit_sigaction(dcontext_t *dcontext, thread_sig_info_t *info,
bool other_thread)
{
int i;
kernel_sigaction_t act;
memset(&act, 0, sizeof(act));
act.handler = (handler_t) SIG_DFL;
kernel_sigemptyset(&act.mask); /* does mask matter for SIG_DFL? */
for (i = 1; i <= MAX_SIGNUM; i++) {
if (!other_thread) {
if (info->app_sigaction[i] != NULL) {
/* restore to old handler, but not if exiting whole
* process: else may get itimer during cleanup, so we
* set to SIG_IGN (we'll have to fix once we impl detach)
*/
if (dynamo_exited) {
info->app_sigaction[i]->handler = (handler_t) SIG_IGN;
sigaction_syscall(i, info->app_sigaction[i], NULL);
}
LOG(THREAD, LOG_ASYNCH, 2, "\trestoring "PFX" as handler for %d\n",
info->app_sigaction[i]->handler, i);
sigaction_syscall(i, info->app_sigaction[i], NULL);
} else if (info->we_intercept[i]) {
/* restore to default */
LOG(THREAD, LOG_ASYNCH, 2, "\trestoring SIG_DFL as handler for %d\n", i);
sigaction_syscall(i, &act, NULL);
}
}
if (info->app_sigaction[i] != NULL) {
handler_free(dcontext, info->app_sigaction[i],
sizeof(kernel_sigaction_t));
}
}
handler_free(dcontext, info->app_sigaction,
SIGARRAY_SIZE * sizeof(kernel_sigaction_t *));
handler_free(dcontext, info->we_intercept, SIGARRAY_SIZE * sizeof(bool));
}
/* Called once a new thread's dcontext is created.
* Inherited and shared fields are set up here.
* The clone_record contains the continuation pc, which is returned.
*/
app_pc
signal_thread_inherit(dcontext_t *dcontext, void *clone_record)
{
app_pc res = NULL;
clone_record_t *record = (clone_record_t *) clone_record;
thread_sig_info_t *info = (thread_sig_info_t *) dcontext->signal_field;
kernel_sigaction_t oldact;
int i, rc;
if (record != NULL) {
app_pc continuation_pc = record->continuation_pc;
LOG(THREAD, LOG_ASYNCH, 1,
"continuation pc is "PFX"\n", continuation_pc);
LOG(THREAD, LOG_ASYNCH, 1,
"parent tid is "TIDFMT", parent sysnum is %d(%s), clone flags="PIFX"\n",
record->caller_id, record->clone_sysnum,
#ifdef SYS_vfork
(record->clone_sysnum == SYS_vfork) ? "vfork" :
#endif
(IF_LINUX(record->clone_sysnum == SYS_clone ? "clone" :)
IF_MACOS(record->clone_sysnum == SYS_bsdthread_create ? "bsdthread_create":)
"unexpected"), record->clone_flags);
#ifdef SYS_vfork
if (record->clone_sysnum == SYS_vfork) {
/* The above clone_flags argument is bogus.
SYS_vfork doesn't have a free register to keep the hardcoded value
see /usr/src/linux/arch/i386/kernel/process.c */
/* CHECK: is this the only place real clone flags are needed? */
record->clone_flags = CLONE_VFORK | CLONE_VM | SIGCHLD;
}
#endif
/* handlers are either inherited or shared */
if (TEST(CLONE_SIGHAND, record->clone_flags)) {
/* need to share table of handlers! */
LOG(THREAD, LOG_ASYNCH, 2, "sharing signal handlers with parent\n");
info->shared_app_sigaction = true;
info->shared_refcount = record->info.shared_refcount;
info->shared_lock = record->info.shared_lock;
info->app_sigaction = record->info.app_sigaction;
info->we_intercept = record->info.we_intercept;
mutex_lock(info->shared_lock);
(*info->shared_refcount)++;
#ifdef DEBUG
for (i = 1; i <= MAX_SIGNUM; i++) {
if (info->app_sigaction[i] != NULL) {
LOG(THREAD, LOG_ASYNCH, 2, "\thandler for signal %d is "PFX"\n",
i, info->app_sigaction[i]->handler);
}
}
#endif
mutex_unlock(info->shared_lock);
} else {
/* copy handlers */
LOG(THREAD, LOG_ASYNCH, 2, "inheriting signal handlers from parent\n");
info->app_sigaction = (kernel_sigaction_t **)
handler_alloc(dcontext, SIGARRAY_SIZE * sizeof(kernel_sigaction_t *));
memset(info->app_sigaction, 0, SIGARRAY_SIZE * sizeof(kernel_sigaction_t *));
for (i = 1; i <= MAX_SIGNUM; i++) {
info->restorer_valid[i] = -1; /* clear cache */
if (record->info.app_sigaction[i] != NULL) {
info->app_sigaction[i] = (kernel_sigaction_t *)
handler_alloc(dcontext, sizeof(kernel_sigaction_t));
memcpy(info->app_sigaction[i], record->info.app_sigaction[i],
sizeof(kernel_sigaction_t));
LOG(THREAD, LOG_ASYNCH, 2, "\thandler for signal %d is "PFX"\n",
i, info->app_sigaction[i]->handler);
}
}
info->we_intercept = (bool *)
handler_alloc(dcontext, SIGARRAY_SIZE * sizeof(bool));
memcpy(info->we_intercept, record->info.we_intercept,
SIGARRAY_SIZE * sizeof(bool));
mutex_lock(&record->info.child_lock);
record->info.num_unstarted_children--;
mutex_unlock(&record->info.child_lock);
/* this should be safe since parent should wait for us */
mutex_lock(&record->parent_info->child_lock);
record->parent_info->num_unstarted_children--;
mutex_unlock(&record->parent_info->child_lock);
}
/* itimers are either private or shared */
if (TEST(CLONE_THREAD, record->clone_flags) && os_itimers_thread_shared()) {
ASSERT(record->info.shared_itimer);
LOG(THREAD, LOG_ASYNCH, 2, "sharing itimers with parent\n");
info->shared_itimer = true;
info->shared_itimer_refcount = record->info.shared_itimer_refcount;
info->shared_itimer_underDR = record->info.shared_itimer_underDR;
info->shared_itimer_lock = record->info.shared_itimer_lock;
info->itimer = record->info.itimer;
acquire_recursive_lock(info->shared_itimer_lock);
(*info->shared_itimer_refcount)++;
release_recursive_lock(info->shared_itimer_lock);
/* shared_itimer_underDR will be incremented in start_itimer() */
} else {
info->shared_itimer = false;
init_itimer(dcontext, false/*!first thread*/);
}
if (APP_HAS_SIGSTACK(info)) {
/* parent was under our control, so the real sigstack we see is just
* the parent's being inherited -- clear it now
*/
memset(&info->app_sigstack, 0, sizeof(stack_t));
info->app_sigstack.ss_flags |= SS_DISABLE;
}
/* rest of state is never shared.
* app_sigstack should already be in place, when we set up our sigstack
* we asked for old sigstack.
* FIXME: are current pending or blocked inherited?
*/
res = continuation_pc;
#ifdef MACOS
if (record->app_thread_xsp != 0) {
HEAP_TYPE_FREE(GLOBAL_DCONTEXT, record, clone_record_t,
ACCT_THREAD_MGT, true/*prot*/);
}
#endif
} else {
/* initialize in isolation */
if (!dynamo_initialized) {
/* Undo the early-init handler */
signal_info_exit_sigaction(GLOBAL_DCONTEXT, &init_info,
false/*!other_thread*/);
/* Undo the unblock-all */
sigprocmask_syscall(SIG_SETMASK, &init_sigmask, NULL, sizeof(init_sigmask));
DOLOG(2, LOG_ASYNCH, {
LOG(THREAD, LOG_ASYNCH, 2, "initial app signal mask:\n");
dump_sigset(dcontext, &init_sigmask);
});
}
if (APP_HAS_SIGSTACK(info)) {
/* parent was NOT under our control, so the real sigstack we see is
* a real sigstack that was present before we took control
*/
LOG(THREAD, LOG_ASYNCH, 1, "app already has signal stack "PFX" - "PFX"\n",
info->app_sigstack.ss_sp,
info->app_sigstack.ss_sp + info->app_sigstack.ss_size);
}
signal_info_init_sigaction(dcontext, info);
info->shared_itimer = false; /* we'll set to true if a child is created */
init_itimer(dcontext, true/*first*/);
if (DYNAMO_OPTION(intercept_all_signals)) {
/* PR 304708: to support client signal handlers without
* the complexity of per-thread and per-signal callbacks
* we always intercept all signals. We also check here
* for handlers the app registered before our init.
*/
for (i=1; i<=MAX_SIGNUM; i++) {
/* cannot intercept KILL or STOP */
if (i != SIGKILL && i != SIGSTOP &&
/* FIXME PR 297033: we don't support intercepting DEFAULT_STOP /
* DEFAULT_CONTINUE signals. Once add support, update
* dr_register_signal_event() comments.
*/
default_action[i] != DEFAULT_STOP &&
default_action[i] != DEFAULT_CONTINUE)
intercept_signal(dcontext, info, i);
}
} else {
/* we intercept the following signals ourselves: */
intercept_signal(dcontext, info, SIGSEGV);
/* PR 313665: look for DR crashes on unaligned memory or mmap bounds */
intercept_signal(dcontext, info, SIGBUS);
/* PR 212090: the signal we use to suspend threads */
intercept_signal(dcontext, info, SUSPEND_SIGNAL);
#ifdef PAPI
/* use SIGPROF for updating gui so it can be distinguished from SIGVTALRM */
intercept_signal(dcontext, info, SIGPROF);
#endif
/* vtalarm only used with pc profiling. it interferes w/ PAPI
* so arm this signal only if necessary
*/
if (INTERNAL_OPTION(profile_pcs)) {
intercept_signal(dcontext, info, SIGVTALRM);
}
#ifdef CLIENT_INTERFACE
intercept_signal(dcontext, info, SIGALRM);
#endif
#ifdef SIDELINE
intercept_signal(dcontext, info, SIGCHLD);
#endif
/* i#61/PR 211530: the signal we use for nudges */
intercept_signal(dcontext, info, NUDGESIG_SIGNUM);
/* process any handlers app registered before our init */
for (i=1; i<=MAX_SIGNUM; i++) {
if (info->we_intercept[i]) {
/* intercept_signal already stored pre-existing handler */
continue;
}
rc = sigaction_syscall(i, NULL, &oldact);
ASSERT(rc == 0
/* Workaround for PR 223720, which was fixed in ESX4.0 but
* is present in ESX3.5 and earlier: vmkernel treats
* 63 and 64 as invalid signal numbers.
*/
IF_VMX86(|| (i >= 63 && rc == -EINVAL))
);
if (rc == 0 &&
oldact.handler != (handler_t) SIG_DFL &&
oldact.handler != (handler_t) master_signal_handler) {
/* could be master_ if inherited */
/* FIXME: if app removes handler, we'll never remove ours */
intercept_signal(dcontext, info, i);
info->we_intercept[i] = false;
}
}
}
/* should be 1st thread */
if (get_num_threads() > 1)
ASSERT_NOT_REACHED();
/* FIXME: any way to recover if not 1st thread? */
res = NULL;
}
unblock_all_signals(&info->app_sigblocked);
DOLOG(2, LOG_ASYNCH, {
LOG(THREAD, LOG_ASYNCH, 2, "thread's initial app signal mask:\n");
dump_sigset(dcontext, &info->app_sigblocked);
});
/* only when SIGVTALRM handler is in place should we start itimer (PR 537743) */
if (INTERNAL_OPTION(profile_pcs)) {
/* even if the parent thread exits, we can use a pointer to its