-
Notifications
You must be signed in to change notification settings - Fork 252
/
slowpath.c
2002 lines (1893 loc) · 81.5 KB
/
slowpath.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* **********************************************************
* Copyright (c) 2010-2017 Google, Inc. All rights reserved.
* Copyright (c) 2008-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/* Dr. Memory: the memory debugger
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation;
* version 2.1 of the License, and no later version.
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
/***************************************************************************
* slowpath.c: Dr. Memory memory read/write slowpath handling
*/
#include "dr_api.h"
#include "drutil.h"
#include "drmemory.h"
#include "instru.h"
#include "slowpath.h"
#include "slowpath_arch.h"
#include "spill.h"
#include "fastpath.h"
#include "stack.h"
#include "alloc_drmem.h"
#include "heap.h"
#include "alloc.h"
#include "report.h"
#include "shadow.h"
#include "syscall.h"
#include "replace.h"
#include "perturb.h"
#include "annotations.h"
#ifdef TOOL_DR_HEAPSTAT
# include "../drheapstat/staleness.h"
#endif
#include "pattern.h"
#include <stddef.h>
#include "asm_utils.h"
#ifdef STATISTICS
/* per-opcode counts */
uint64 slowpath_count[OP_LAST+1];
/* per-opsz counts */
uint64 slowpath_sz1;
uint64 slowpath_sz2;
uint64 slowpath_sz4;
uint64 slowpath_sz8;
uint64 slowpath_sz10;
uint64 slowpath_sz16;
uint64 slowpath_szOther;
/* PR 423757: periodic stats dump */
uint next_stats_dump;
uint num_faults;
uint num_slowpath_faults;
#endif
#ifdef STATISTICS
uint slowpath_executions;
uint medpath_executions;
uint read_slowpath;
uint write_slowpath;
uint push_slowpath;
uint pop_slowpath;
uint read_fastpath;
uint write_fastpath;
uint push_fastpath;
uint pop_fastpath;
uint read4_fastpath;
uint write4_fastpath;
uint push4_fastpath;
uint pop4_fastpath;
uint slow_instead_of_fast;
uint heap_header_exception;
uint tls_exception;
uint alloca_exception;
uint strlen_exception;
uint strlen_uninit_exception;
uint strcpy_exception;
uint rawmemchr_exception;
uint strmem_unaddr_exception;
uint strrchr_exception;
uint andor_exception;
uint bitfield_const_exception;
uint bitfield_xor_exception;
uint loader_DRlib_exception;
uint cppexcept_DRlib_exception;
uint fldfst_exception;
uint heap_func_ref_ignored;
uint reg_dead;
uint reg_xchg;
uint reg_spill;
uint reg_spill_slow;
uint reg_spill_own;
uint reg_spill_used_in_bb;
uint reg_spill_unused_in_bb;
uint addressable_checks_elided;
uint aflags_saved_at_top;
uint xl8_shared;
uint xl8_not_shared_reg_conflict;
uint xl8_not_shared_scratch_conflict;
uint xl8_not_shared_disp_too_big;
uint xl8_not_shared_unaligned;
uint xl8_not_shared_mem2mem;
uint xl8_not_shared_offs;
uint xl8_not_shared_slowpaths;
uint xl8_shared_slowpath_instrs;
uint xl8_shared_slowpath_count;
uint slowpath_unaligned;
uint slowpath_8_at_border;
uint app_instrs_fastpath;
uint app_instrs_no_dup;
uint xl8_app_for_slowpath;
uint num_bbs;
#endif
/***************************************************************************
* Registers
*/
/* To relocate restores to the common slowpath yet still support
* site-specific scratch registers we have restore patterns for
* every possible combination
*/
/* XXX i#1726: update for ARM */
/* variable-reg: reg1 and reg2 */
enum {
SPILL_REG_NONE, /* !used and !dead */
SPILL_REG_EAX, /* this reg is spilled to tls */
SPILL_REG_ECX,
SPILL_REG_EDX,
SPILL_REG_EBX,
SPILL_REG_EAX_DEAD, /* this reg is dead */
SPILL_REG_ECX_DEAD,
SPILL_REG_EDX_DEAD,
SPILL_REG_EBX_DEAD,
SPILL_REG_NUM,
};
enum {
SPILL_REG3_NOSPILL,
SPILL_REG3_SPILL,
SPILL_REG3_NUM,
};
enum {
SPILL_EFLAGS_NOSPILL,
SPILL_EFLAGS_5_NOEAX,
SPILL_EFLAGS_6_NOEAX,
SPILL_EFLAGS_5_EAX,
SPILL_EFLAGS_6_EAX,
SPILL_EFLAGS_NUM,
};
#define SPILL_REG3_REG DR_REG_XCX
/* The 4 indices are: reg1, reg2, reg3, eflags */
byte *shared_slowpath_entry_local[SPILL_REG_NUM][SPILL_REG_NUM][SPILL_REG3_NUM][SPILL_EFLAGS_NUM];
/* For whole-bb spilling, we do not restore eflags, but reg3 can be anything */
byte *shared_slowpath_entry_global[SPILL_REG_NUM][SPILL_REG_NUM][SPILL_REG_NUM];
byte *shared_slowpath_region;
byte *shared_slowpath_entry;
/* adjust_esp's shared fast and slow paths pointers are below */
/* Lock for updating gencode later */
static void *gencode_lock;
/***************************************************************************
* ISA
*/
bool
instr_propagatable_dsts(instr_t *inst)
{
int i;
bool res = false;
int opc = instr_get_opcode(inst);
for (i = 0; i < instr_num_dsts(inst); i++) {
opnd_t opnd = instr_get_dst(inst, i);
/* i#1543, i#243: we now shadow xmm regs and propagate and mirror xmm
* operations (at least most of them: work in progress).
*/
if ((opnd_is_reg(opnd) && reg_is_shadowed(opc, opnd_get_reg(opnd))) ||
opnd_is_memory_reference(opnd)) {
res = true;
} else {
res = false;
break;
}
}
return res;
}
#ifdef TOOL_DR_MEMORY
/* drcontext can be NULL if the operand is an immed int.
*
* For mmx, xmm, or ymm sources, returns just the lower reg_t bits.
* XXX: we'll need to return the full value for handling OP_pand, etc.!
* For now we only use this to get shift amounts for which we can ignore
* all high bits.
*/
bool
get_cur_src_value(void *drcontext, instr_t *inst, uint i, reg_t *val)
{
dr_mcontext_t mc; /* do not init whole thing: memset is expensive */
opnd_t src = instr_get_src(inst, i);
if (val == NULL)
return false;
if (opnd_is_immed_int(src)) {
*val = (reg_t) opnd_get_immed_int(src);
return true;
}
ASSERT(drcontext != NULL, "need drcontext for non-immed opnd");
mc.size = sizeof(mc);
mc.flags = DR_MC_CONTROL|DR_MC_INTEGER; /* don't need xmm */
dr_get_mcontext(drcontext, &mc);
if (opnd_is_memory_reference(src)) {
app_pc addr = opnd_compute_address(src, &mc);
size_t sz = opnd_size_in_bytes(opnd_get_size(src));
if (sz > sizeof(*val))
return false;
return (safe_read(addr, sz, val));
} else if (opnd_is_reg(src)) {
byte val32[sizeof(dr_ymm_t)];
reg_id_t reg = opnd_get_reg(src);
if (!reg_is_gpr(reg)) {
mc.flags |= DR_MC_MULTIMEDIA;
dr_get_mcontext(drcontext, &mc);
}
if (!reg_get_value_ex(reg, &mc, val32))
return false;
*val = *(reg_t*)val32;
return true;
}
return false;
}
static inline bool
opnds_overlap(opnd_t op1, opnd_t op2)
{
/* XXX: should we check overlap on memory opnd? */
return (opnd_same(op1, op2) ||
(opnd_is_reg(op1) && opnd_is_reg(op2) && opnd_share_reg(op1, op2)));
}
bool
instrs_share_opnd(instr_t *in1, instr_t *in2)
{
int i, j;
for (i = 0; i < instr_num_srcs(in1); i++) {
for (j = 0; j < instr_num_srcs(in2); j++) {
if (opnds_overlap(instr_get_src(in1, i), instr_get_src(in2, j)))
return true;
}
for (j = 0; j < instr_num_dsts(in2); j++) {
if (opnds_overlap(instr_get_src(in1, i), instr_get_dst(in2, j)))
return true;
}
}
for (i = 0; i < instr_num_dsts(in1); i++) {
for (j = 0; j < instr_num_srcs(in2); j++) {
if (opnds_overlap(instr_get_dst(in1, i), instr_get_src(in2, j)))
return true;
}
for (j = 0; j < instr_num_dsts(in2); j++) {
if (opnds_overlap(instr_get_dst(in1, i), instr_get_dst(in2, j)))
return true;
}
}
return false;
}
#endif /* TOOL_DR_MEMORY */
/***************************************************************************
* Definedness and Addressability Checking
*/
void
shadow_combine_init(shadow_combine_t *comb, instr_t *inst, uint opcode, uint max)
{
uint i;
uint init_shadow = SHADOW_DEFINED;
if (opc_dst_subreg_nonlow(opcode) &&
inst != NULL && instr_num_dsts(inst) == 1) {
opnd_t dst = instr_get_dst(inst, 0);
if (opnd_is_reg(dst)) {
reg_id_t reg = opnd_get_reg(dst);
uint opsz = opnd_size_in_bytes(opnd_get_size(dst));
uint regsz = opnd_size_in_bytes(reg_get_size(reg));
if (opsz < regsz) {
/* For opcodes that write to only part of the reg and leave the
* rest unchanged and don't write to just the bottom of the reg,
* we have to pass every byte of the register shadow to
* map_src_to_dst(). We need to incorporate the prior reg
* shadow values, which we can't solely do later as we need to
* distinguish what was written by the opcode. By using
* BITLEVEL we ensure that shadow_combine() will clobber this
* rather than OR it in.
*/
init_shadow = SHADOW_DEFINED_BITLEVEL;
}
}
}
comb->dst = comb->raw;
/* Initialize to defined so we can aggregate operands as we go.
* This works with no-source instrs (rdtsc, etc.)
* This also makes small->large work out w/o any special processing
* (movsz, movzx, cwde, etc.): but XXX: are there any src/dst size
* mismatches where we do NOT want to set dst bytes beyond count
* of src bytes to defined?
*/
for (i = 0; i < max; i++)
comb->dst[i] = init_shadow;
comb->eflags = SHADOW_DEFINED;
comb->inst = inst;
comb->opcode = opcode;
}
#ifdef TOOL_DR_MEMORY
/* Adds a new source operand's value to the array of shadow vals in
* comb->dst to be assigned to the destination.
*/
static void
integrate_register_shadow(shadow_combine_t *comb INOUT, int opnum,
reg_id_t reg, uint shadow, bool pushpop)
{
uint i, sz;
if (reg == REG_EFLAGS) {
/* eflags propagates to all bytes */
uint dstsz;
accum_shadow(&comb->eflags, SHADOW_DWORD2BYTE(shadow, 0));
if (instr_num_dsts(comb->inst) == 0)
return;
dstsz = opnd_size_in_bytes(opnd_get_size(instr_get_dst(comb->inst, 0)));
for (i = 0; i < dstsz; i++)
accum_shadow(&comb->dst[i], SHADOW_DWORD2BYTE(shadow, i));
return;
}
/* PR 426162: ignore stack register source if instr also has memref
* using same register as addressing register, since memref will do a
* definedness check for us, and if the reg is undefined we do NOT want
* to propagate it as it will end up in a regular dest, say pop into a
* reg, when that dest should only depend on the memref (since on
* reported error we set addressing register to defined).
*/
if (pushpop && reg_overlap(reg, DR_REG_XSP))
return;
if (integrate_register_shadow_arch(comb, opnum, reg, shadow, pushpop))
return;
if (opc_dst_subreg_nonlow(comb->opcode)) {
/* Deliberately bypassing opnd_get_size() so we can pick the right bits out
* of the reg for opcodes that are sub-xmm but pull from higher than offset
* 0 (e.g., pextr*).
*/
ASSERT(comb->opnd_valid, "need opnd valid for subreg-nonzero opcodes");
sz = opnd_size_in_bytes(reg_get_size(reg));
} else
sz = opnd_size_in_bytes(opnd_get_size(comb->opnd));
for (i = 0; i < sz; i++)
map_src_to_dst(comb, opnum, i, SHADOW_DWORD2BYTE(shadow, i));
}
/* Assigns the array of source shadow_vals to the destination register shadow */
static void
assign_register_shadow(shadow_combine_t *comb INOUT, int opnum, opnd_t opnd,
reg_id_t reg, bool pushpop)
{
uint shift = 0;
uint sz, i;
/* Here we need to de-mux from the side-by-side dests in the array
* into individual register dests.
* We also have to shift dsts that do NOT simply go into the lowest slot.
*/
if (assign_register_shadow_arch(comb, opnum, opnd, reg, pushpop, &shift))
return;
if (pushpop && reg_overlap(reg, DR_REG_XSP))
return;
if (opc_dst_subreg_nonlow(comb->opcode)) {
uint shadow = get_shadow_register(reg);
/* Deliberately bypassing opnd_get_size() so we can pick the right bits out
* of the reg for opcodes that are sub-xmm but pull from higher than offset
* 0 (e.g., pextr*).
*/
sz = opnd_size_in_bytes(reg_get_size(reg));
/* Replace the BITLEVEL markers with the register's prior shadow value */
for (i = 0; i < sz; i++) {
if (comb->dst[i] == SHADOW_DEFINED_BITLEVEL)
comb->dst[i] = SHADOW_DWORD2BYTE(shadow, i);
}
} else
sz = opnd_size_in_bytes(opnd_get_size(opnd));
shift *= sz;
register_shadow_set_byte(reg, reg_offs_in_dword(reg), comb->dst[shift + 0]);
if (sz > 1) {
ASSERT(reg_offs_in_dword(reg) == 0, "invalid reg offs");
for (i = 1; i < sz; i++) {
ASSERT(shift + i < OPND_SHADOW_ARRAY_LEN, "shadow_vals overflow");
register_shadow_set_byte(reg, i, comb->dst[shift + i]);
}
}
}
void
register_shadow_mark_defined(reg_id_t reg, size_t sz)
{
uint i;
if (sz == 4 && reg_is_gpr(reg))
register_shadow_set_dword(reg, SHADOW_DWORD_DEFINED);
else {
for (i = 0; i < sz; i++)
register_shadow_set_byte(reg, i, SHADOW_DEFINED);
}
}
bool
opnd_uses_nonignorable_memory(opnd_t opnd)
{
/* XXX: we could track ebp/r11 and try to determine when not used as frame ptr */
return (opnd_is_memory_reference(opnd) &&
/* pattern mode */
(options.pattern == 0 ? true : pattern_opnd_needs_check(opnd)) &&
/* stack access */
(options.check_stack_access ||
!opnd_is_base_disp(opnd) ||
(reg_to_pointer_sized(opnd_get_base(opnd)) != DR_REG_XSP &&
reg_to_pointer_sized(opnd_get_base(opnd)) != REG_FRAME_PTR) ||
opnd_get_index(opnd) != REG_NULL ||
opnd_is_far_memory_reference(opnd)));
}
/* Called by slow_path() after initial decode. Expected to free inst. */
bool
slow_path_without_uninitialized(void *drcontext, dr_mcontext_t *mc, instr_t *inst,
app_loc_t *loc, size_t instr_sz)
{
opnd_t opnd, memop = opnd_create_null();
int opc, i, num_srcs, num_dsts;
uint sz;
bool pushpop_stackop;
uint flags;
ASSERT(!options.check_uninitialized, "should not be called");
opc = instr_get_opcode(inst);
num_srcs = (IF_X86_ELSE(opc == OP_lea, false)) ? 0 : num_true_srcs(inst, mc);
for (i = 0; i < num_srcs; i++) {
opnd = instr_get_src(inst, i);
if (opnd_uses_nonignorable_memory(opnd)) {
opnd = adjust_memop(inst, opnd, false, &sz, &pushpop_stackop);
if (pushpop_stackop && options.check_stack_bounds)
flags = MEMREF_PUSHPOP | MEMREF_IS_READ;
else
flags = MEMREF_CHECK_ADDRESSABLE | MEMREF_IS_READ;
memop = opnd;
check_mem_opnd_nouninit(opc, flags, loc, opnd, sz, mc);
}
}
num_dsts = num_true_dsts(inst, mc);
for (i = 0; i < num_dsts; i++) {
opnd = instr_get_dst(inst, i);
if (opnd_uses_nonignorable_memory(opnd)) {
opnd = adjust_memop(inst, opnd, true, &sz, &pushpop_stackop);
if (pushpop_stackop && options.check_stack_bounds)
flags = MEMREF_PUSHPOP | MEMREF_WRITE;
else
flags = MEMREF_CHECK_ADDRESSABLE;
memop = opnd;
check_mem_opnd_nouninit(opc, flags, loc, opnd, sz, mc);
}
}
instr_free(drcontext, inst);
/* call this last after freeing inst in case it does a synchronous flush */
slow_path_xl8_sharing(loc, instr_sz, memop, mc);
return true;
}
#endif /* TOOL_DR_MEMORY */
/* Does everything in C code, except for handling non-push/pop writes to esp.
*
* General design:
* + comb.dest[] array holds the shadow values for the destinations.
* If there are multiple dests, they are laid out side-by-side.
* + Shadow values are combined via combine_shadows() which does OR-combining.
*
* First we walk the sources and add each in turn to the shadow array via:
* + integrate_register_shadow() for regs
* + handle_mem_ref() with MEMREF_USE_VALUES for memrefs
* Both call map_src_to_dst() which determines where in
* the dst shadow array to put each source, thus handling arbitrary
* opcodes with weird data movements.
*
* Then we walk the dests and call handle_mem_ref() or
* assign_register_shadow() on each, which pulls from comb.dest[]'s shadow vals.
*
* XXX: can we change handle_mem_ref() and map_src_to_dst() to not operate on
* one byte at a time, so we can make the slowpath more closely match the
* fastpath code, and thus make it easier to transition opcodes to the fastpath?
*/
bool
slow_path_with_mc(void *drcontext, app_pc pc, app_pc decode_pc, dr_mcontext_t *mc)
{
instr_t inst;
int opc;
#ifdef TOOL_DR_MEMORY
opnd_t opnd;
int i, num_srcs, num_dsts;
uint sz;
shadow_combine_t comb;
bool check_definedness, pushpop, pushpop_stackop;
bool check_srcs_after;
bool always_defined;
opnd_t memop = opnd_create_null();
size_t instr_sz;
cls_drmem_t *cpt = (cls_drmem_t *) drmgr_get_cls_field(drcontext, cls_idx_drmem);
#endif
app_loc_t loc;
#if defined(STATISTICS) && defined(TOOL_DR_MEMORY)
/* PR 423757: periodic stats dump, both for server apps that don't
* close cleanly and to get stats out prior to overflow.
*/
int execs = atomic_add32_return_sum((volatile int *)&slowpath_executions, 1);
if (execs == next_stats_dump) {
/* still racy: could skip a dump, but that's ok */
ATOMIC_ADD32(next_stats_dump, options.stats_dump_interval);
dr_fprintf(f_global, "\n**** per-%dK-slowpath stats dump:\n",
options.stats_dump_interval/1000);
dump_statistics();
}
#endif
pc_to_loc(&loc, pc);
/* Locally-spilled and whole-bb-spilled (PR 489221) registers have
* already been restored in shared_slowpath, so we can properly
* emulate addresses referenced. We can't restore whole-bb-spilled
* here b/c we don't have the bb tag. Eflags may not be restored
* but we don't rely on them here.
*/
/* for jmp-to-slowpath optimization where we xl8 to get app pc (PR 494769)
* we always pass NULL for decode_pc
*/
if (decode_pc == NULL) {
/* not using safe_read since in cache */
byte *ret_pc = (byte *) get_own_tls_value(SPILL_SLOT_2);
ASSERT(pc == NULL, "invalid params");
ASSERT(options.single_arg_slowpath, "only used for single_arg_slowpath");
/* If the ret pc is a jmp, we know to walk forward, bypassing
* spills, to find the app instr (we assume app jmp never
* needs slowpath). If using a cloned app instr, then ret pc
* points directly there. Since we want to skip the clone and
* the jmp, we always skip the instr at ret pc when returning.
*/
pc = decode_next_pc(drcontext, ret_pc);
ASSERT(pc != NULL, "invalid stored app instr");
set_own_tls_value(SPILL_SLOT_2, (reg_t) pc);
if (*ret_pc == 0xe9) {
/* walk forward to find the app pc */
instr_init(drcontext, &inst);
do {
instr_reset(drcontext, &inst);
decode_pc = pc;
pc = decode(drcontext, decode_pc, &inst);
ASSERT(pc != NULL, "invalid app instr copy");
} while (instr_is_spill(&inst) || instr_is_restore(&inst));
instr_reset(drcontext, &inst);
} else
decode_pc = ret_pc;
/* if we want the app addr later, we'll have to translate to get it */
loc.u.addr.valid = false;
loc.u.addr.pc = decode_pc;
pc = NULL;
} else
ASSERT(!options.single_arg_slowpath, "single_arg_slowpath error");
#ifdef TOOL_DR_MEMORY
if (decode_pc != NULL) {
if (medium_path_arch(decode_pc, &loc, mc))
return true;
}
#endif /* TOOL_DR_MEMORY */
instr_init(drcontext, &inst);
#ifdef TOOL_DR_MEMORY
instr_sz = decode(drcontext, decode_pc, &inst) - decode_pc;
#else
decode(drcontext, decode_pc, &inst);
#endif
ASSERT(instr_valid(&inst), "invalid instr");
opc = instr_get_opcode(&inst);
slowpath_update_app_loc_arch(opc, decode_pc, &loc);
#ifdef STATISTICS
STATS_INC(slowpath_count[opc]);
{
uint bytes = instr_memory_reference_size(&inst);
if (bytes == 0) {
if (instr_num_dsts(&inst) > 0 &&
!opnd_is_pc(instr_get_dst(&inst, 0)) &&
!opnd_is_instr(instr_get_dst(&inst, 0)))
bytes = opnd_size_in_bytes(opnd_get_size(instr_get_dst(&inst, 0)));
else if (instr_num_srcs(&inst) > 0 &&
!opnd_is_pc(instr_get_src(&inst, 0)) &&
!opnd_is_instr(instr_get_src(&inst, 0)))
bytes = opnd_size_in_bytes(opnd_get_size(instr_get_src(&inst, 0)));
else
bytes = 0;
}
if (bytes == 1)
STATS_INC(slowpath_sz1);
else if (bytes == 2)
STATS_INC(slowpath_sz2);
else if (bytes == 4)
STATS_INC(slowpath_sz4);
else if (bytes == 8)
STATS_INC(slowpath_sz8);
else if (bytes == 10)
STATS_INC(slowpath_sz10);
else if (bytes == 16)
STATS_INC(slowpath_sz16);
else
STATS_INC(slowpath_szOther);
}
#endif
DOLOG(3, {
LOG(3, "\nslow_path "PFX": ", pc);
instr_disassemble(drcontext, &inst, LOGFILE_GET(drcontext));
if (instr_num_dsts(&inst) > 0 &&
opnd_is_memory_reference(instr_get_dst(&inst, 0))) {
umbra_shadow_memory_info_t info;
umbra_shadow_memory_info_init(&info);
LOG(3, " | 0x%x",
shadow_get_byte(&info,
opnd_compute_address(instr_get_dst(&inst, 0),
mc)));
}
LOG(3, "\n");
});
#ifdef TOOL_DR_HEAPSTAT
return slow_path_for_staleness(drcontext, mc, &inst, &loc);
#else
if (!options.check_uninitialized)
return slow_path_without_uninitialized(drcontext, mc, &inst, &loc, instr_sz);
LOG(4, "shadow registers prior to instr:\n");
DOLOG(4, { print_shadow_registers(); });
/* We need to do the following:
* - check addressability of all memory operands
* - check definedness of all source operands if:
* o no GPR or memory dest (=> no way to store definedness)
* o if options.check_uninit_non_moves is on and this is not just a move
* - check definedness of certain source operands:
* o base or index register to a memory ref
* (includes esp/ebp operand to a push/pop)
* o ecx to stringop
* o ebp to enter/leave
* - combine definedness of source operands and store
* in dest operand shadows
* - if a pop, make stack slot(s) unaddressable
*
* Usually there's one destination we need to transfer
* definedness to. If there are more, we can fit them side by
* side in our 8-dword-capacity comb->dst array.
*/
check_definedness = instr_check_definedness(&inst);
always_defined = result_is_always_defined(&inst, false/*us*/);
pushpop = opc_is_push(opc) || opc_is_pop(opc);
check_srcs_after = instr_needs_all_srcs_and_vals(&inst);
if (check_srcs_after) {
/* We need to check definedness of addressing registers, and so we do
* our normal src loop but we do not check undefinedness or combine
* sources. Below we pass pointers to later in comb->dst to
* check_mem_opnd() and integrate_register_shadow(), causing the 2
* sources to be laid out side-by-side in comb->dst.
*/
ASSERT(instr_num_srcs(&inst) == 2, "and/or special handling error");
check_definedness = false;
IF_DEBUG(comb.opsz = 0;) /* for asserts below */
}
shadow_combine_init(&comb, &inst, opc, OPND_SHADOW_ARRAY_LEN);
num_srcs = (IF_X86_ELSE(opc == OP_lea, false)) ? 2 : num_true_srcs(&inst, mc);
check_srcs:
for (i = 0; i < num_srcs; i++) {
if (IF_X86_ELSE(opc == OP_lea, false)) {
/* special case: treat address+base as propagatable sources
* code below can handle REG_NULL
*/
if (i == 0)
opnd = opnd_create_reg(opnd_get_base(instr_get_src(&inst, 0)));
else
opnd = opnd_create_reg(opnd_get_index(instr_get_src(&inst, 0)));
} else {
opnd = instr_get_src(&inst, i);
}
if (opnd_is_memory_reference(opnd)) {
int flags = 0;
opnd = adjust_memop(&inst, opnd, false, &sz, &pushpop_stackop);
/* do not combine srcs if checking after */
if (check_srcs_after) {
ASSERT(i == 0 || sz >= comb.opsz, "check-after needs >=-size srcs");
comb.dst = &comb.raw[i*sz]; /* shift the dst in the array */
}
shadow_combine_set_opnd(&comb, opnd, sz);
/* check_mem_opnd() checks definedness of base registers,
* addressability of address, and if necessary checks definedness
* and adjusts addressability of address.
*/
if (pushpop_stackop)
flags |= MEMREF_PUSHPOP;
if (always_defined) {
LOG(2, "marking and/or/xor with 0/~0/self as defined @"PFX"\n", pc);
/* w/o MEMREF_USE_VALUES, handle_mem_ref() will use SHADOW_DEFINED */
} else if (check_definedness || always_check_definedness(&inst, i)) {
flags |= MEMREF_CHECK_DEFINEDNESS;
if (options.leave_uninit)
flags |= MEMREF_USE_VALUES;
} else {
/* If we're checking, to avoid further errors we do not
* propagate the shadow vals (and thus we essentially
* propagate SHADOW_DEFINED).
* Conveniently all the large operand sizes always
* have check_definedness since they involve fp or sse.
*/
ASSERT(sz <= sizeof(comb.raw), "internal shadow val error");
flags |= MEMREF_USE_VALUES;
}
memop = opnd;
check_mem_opnd(opc, flags, &loc, opnd, sz, mc, i, &comb);
} else if (opnd_is_reg(opnd)) {
reg_id_t reg = opnd_get_reg(opnd);
if (reg_is_shadowed(opc, reg)) {
uint shadow = get_shadow_register(reg);
if (opc_dst_subreg_nonlow(opc)) {
/* We need the whole reg as this opcode references high up */
sz = opnd_size_in_bytes(reg_get_size(reg));
} else
sz = opnd_size_in_bytes(opnd_get_size(opnd));
/* do not combine srcs if checking after */
if (check_srcs_after) {
ASSERT(i == 0 || sz >= comb.opsz, "check-after needs >=-size srcs");
comb.dst = &comb.raw[i*sz]; /* shift the dst in the array */
}
shadow_combine_set_opnd(&comb, opnd, sz);
if (always_defined) {
/* if result defined regardless, don't propagate (is
* equivalent to propagating SHADOW_DEFINED) or check */
} else if (check_definedness || always_check_definedness(&inst, i)) {
check_register_defined(drcontext, reg, &loc, sz, mc, &inst);
if (options.leave_uninit) {
integrate_register_shadow(&comb, i, reg, shadow, pushpop);
}
} else {
/* See above: we only propagate when not checking */
integrate_register_shadow(&comb, i, reg, shadow, pushpop);
}
} /* else always defined */
} else /* always defined */
ASSERT(opnd_is_immed_int(opnd) || opnd_is_pc(opnd), "unexpected opnd");
DOLOG(4, {
int j;
LOG(4, "shadows after src %d ", i);
opnd_disassemble(drcontext, opnd, LOGFILE_GET(drcontext));
LOG(4, ": ");
for (j = 0; j < OPND_SHADOW_ARRAY_LEN; j++)
LOG(4, "%d", comb.raw[j]);
LOG(4, ", eflags: %d\n", comb.eflags);
});
}
/* eflags source */
if (TESTANY(EFLAGS_READ_ARITH, instr_get_eflags(&inst, DR_QUERY_DEFAULT))) {
uint shadow = get_shadow_eflags();
/* for check_srcs_after we leave comb.dst where it last was */
if (always_defined) {
/* if result defined regardless, don't propagate (is
* equivalent to propagating SHADOW_DEFINED) or check */
} else if (check_definedness) {
check_register_defined(drcontext, REG_EFLAGS, &loc, 1, mc, &inst);
if (options.leave_uninit)
integrate_register_shadow(&comb, 0, REG_EFLAGS, shadow, pushpop);
} else {
/* See above: we only propagate when not checking */
integrate_register_shadow(&comb, 0, REG_EFLAGS, shadow, pushpop);
}
} else if (num_srcs == 0) {
/* do not propagate from comb.dst since dst size could be large (i#458)
* (fxsave, etc.)
*/
always_defined = true;
}
if (check_srcs_after)
comb.dst = comb.raw; /* restore */
if (check_srcs_after) {
/* turn back on for dsts */
check_definedness = instr_check_definedness(&inst);
if (check_andor_sources(drcontext, mc, &inst, &comb, decode_pc + instr_sz)) {
if (TESTANY(EFLAGS_WRITE_ARITH,
instr_get_eflags(&inst, DR_QUERY_INCLUDE_ALL))) {
/* We have to redo the eflags propagation. map_src_to_dst() combined
* all the laid-out sources, some of which we made defined in
* check_andor_sources.
*/
comb.eflags = SHADOW_DEFINED;
for (i = 0; i < OPND_SHADOW_ARRAY_LEN; i++)
accum_shadow(&comb.eflags, comb.dst[i]);
}
}
if (check_definedness) {
/* If we need to report undefs we have to go back */
bool all_defined = true;
for (i = 0; i < OPND_SHADOW_ARRAY_LEN; i++) {
if (comb.dst[i] != SHADOW_DEFINED) {
all_defined = false;
break;
}
}
if (!all_defined) {
/* We do not bother to suppress reporting the particular bytes that
* may have been "defined" due to 0/1 in the other operand since
* doing so would require duplicating/extracting all the reporting
* logic above for regs and in handle_mem_ref(): our goto here is
* slightly less ugly.
*/
LOG(4, "and/or not all defined and need to check def: restarting\n");
/* Avoid recursing, and don't do the side-by-side layout this time */
check_srcs_after = false;
for (i = 0; i < OPND_SHADOW_ARRAY_LEN; i++)
comb.dst[i] = SHADOW_DEFINED;
comb.eflags = SHADOW_DEFINED;
goto check_srcs;
}
}
}
num_dsts = num_true_dsts(&inst, mc);
for (i = 0; i < num_dsts; i++) {
opnd = instr_get_dst(&inst, i);
if (opnd_is_memory_reference(opnd)) {
int flags = MEMREF_WRITE;
opnd = adjust_memop(&inst, opnd, true, &sz, &pushpop_stackop);
if (pushpop_stackop)
flags |= MEMREF_PUSHPOP;
if (cpt->mem2fpmm_source != NULL && cpt->mem2fpmm_pc == pc) {
/* i#471 fld;fstp heuristic: fstp's dest was marked bitlevel to
* get us here. Do a special-case propagate.
*/
umbra_shadow_memory_info_t info;
LOG(3, "propagating fld;fstp from "PFX"\n", cpt->mem2fpmm_source);
/* We use a fake movs in handle_mem_ref() (can't just do
* shadow_copy_range() b/c we need to check base reg for
* definedness, check for addressability, etc.)
*/
umbra_shadow_memory_info_init(&info);
shadow_set_byte(&info, cpt->mem2fpmm_dest, cpt->mem2fpmm_prev_shadow);
comb.movs_addr = cpt->mem2fpmm_source;
flags |= MEMREF_MOVS | MEMREF_USE_VALUES;
cpt->mem2fpmm_source = NULL;
} else if (always_defined) {
/* w/o MEMREF_USE_VALUES, handle_mem_ref() will use SHADOW_DEFINED */
} else if (check_definedness) {
flags |= MEMREF_CHECK_DEFINEDNESS;
if (options.leave_uninit)
flags |= MEMREF_USE_VALUES;
/* since checking, we mark as SHADOW_DEFINED (see above) */
} else {
ASSERT(sz <= sizeof(comb.raw), "internal shadow val error");
flags |= MEMREF_USE_VALUES;
}
/* check addressability, and propagate
* we arranged xchg/xadd to not need shifting; nothing else does either.
*/
memop = opnd;
check_mem_opnd(opc, flags, &loc, opnd, sz, mc, i, &comb);
} else if (opnd_is_reg(opnd)) {
reg_id_t reg = opnd_get_reg(opnd);
if (reg_is_shadowed(opc, reg)) {
assign_register_shadow(&comb, i, opnd, reg, pushpop);
}
} else
ASSERT(opnd_is_immed_int(opnd) || opnd_is_pc(opnd), "unexpected opnd");
}
if (TESTANY(EFLAGS_WRITE_ARITH, instr_get_eflags(&inst, DR_QUERY_INCLUDE_ALL))) {
set_shadow_eflags(comb.eflags);
}
LOG(4, "shadow registers after instr:\n");
DOLOG(4, { print_shadow_registers(); });
instr_free(drcontext, &inst);
/* call this last after freeing inst in case it does a synchronous flush */
slow_path_xl8_sharing(&loc, instr_sz, memop, mc);
DOLOG(5, { /* this pollutes the logfile, so it's a pain to have at 4 or lower */
if (!options.single_arg_slowpath && pc == decode_pc/*else retpc not in tls3*/) {
/* Test translation when have both args */
/* we want the ultimate target, not whole_bb_spills_enabled()'s
* SPILL_SLOT_5 intermediate target
*/
byte *ret_pc = (byte *) get_own_tls_value(SPILL_SLOT_2);
/* ensure event_restore_state() returns true */
byte *xl8;
cpt->self_translating = true;
xl8 = dr_app_pc_from_cache_pc(ret_pc);
cpt->self_translating = false;
LOG(3, "translation test: cache="PFX", orig="PFX", xl8="PFX"\n",
ret_pc, pc, xl8);
ASSERT(xl8 == pc ||
IF_X86((options.repstr_to_loop &&
/* Depending on -no_fastpath we'll get here for the jecxz
* pointing at the loop, the loop, or the stringop.
*/
(opc_is_stringop(opc) || opc == OP_loop) &&
/* For repstr_to_loop we changed pc */
(xl8 == loc_to_pc(&loc) ||
/* For repstr_to_loop OP_loop, ret_pc is the restore
* code after stringop and before OP_loop*, so we'll get
* post-xl8 pc.
*/
xl8 == decode_next_pc(drcontext, loc_to_pc(&loc)))) ||)
/* ret_pc may be a global reg restore, and for -no_fastpath
* this will use the prior xl8 since there's no meta-xl8 and
* the real app instr is beyond ret_pc.
*/
(instr_at_pc_is_restore(drcontext, ret_pc) &&
pc == decode_next_pc(drcontext, xl8)) ||
/* for native ret we changed pc */
(options.replace_malloc && opc == IF_X86_ELSE(OP_ret, OP_bx) &&
alloc_entering_replace_routine(xl8)),
"xl8 doesn't match");
}
});
return true;
#endif /* !TOOL_DR_HEAPSTAT */
}
/* called from code cache */
static bool
slow_path(app_pc pc, app_pc decode_pc)
{
void *drcontext = dr_get_current_drcontext();
dr_mcontext_t mc; /* do not init whole thing: memset is expensive */
bool res;
mc.size = sizeof(mc);
mc.flags = DR_MC_CONTROL|DR_MC_INTEGER; /* don't need xmm */
dr_get_mcontext(drcontext, &mc);
res = slow_path_with_mc(drcontext, pc, decode_pc, &mc);
#ifdef TOOL_DR_MEMORY
DODEBUG({
cls_drmem_t *cpt = (cls_drmem_t *) drmgr_get_cls_field(drcontext, cls_idx_drmem);
/* Try to ensure that mem2fpmm_source doesn't "escape" */
ASSERT(cpt->mem2fpmm_source == NULL ||
(pc >= cpt->mem2fpmm_load_pc && pc <= cpt->mem2fpmm_pc),
"mem2fpmm source escaped");
});
#endif
return res;
}
/* Returns whether a single pc can be used for app reporting and
* decoding of the app instr (or, whether a separate decode pc can be
* used b/c there's fixup code for the pc to report in the slowpath).
* The OUT param is either an immed int opnd or an instr opnd that can be
* used as an intpr opnd for decoding.
*/
static bool
instr_shared_slowpath_decode_pc(instr_t *inst, fastpath_info_t *mi,