Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#1795 drreg: cleanup: remove -single_arg_slowpath #2095

Merged
merged 1 commit into from
Apr 1, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions drmemory/client_per_thread.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2010-2014 Google, Inc. All rights reserved.
* Copyright (c) 2010-2017 Google, Inc. All rights reserved.
* Copyright (c) 2007-2010 VMware, Inc. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -49,7 +49,10 @@ typedef struct _cls_drmem_t {
# endif
#endif /* TOOL_DR_MEMORY */

/* for jmp-to-slowpath optimization where we xl8 to get app pc (PR 494769) */
/* Was mostly used for jmp-to-slowpath optimization where we xl8
* to get app pc (PR 494769) which was now removed, but also used
* for logging.
*/
bool self_translating;

/* for i#471 and i#1453: mem2mem via fp or mm reg heuristic */
Expand Down
2 changes: 0 additions & 2 deletions drmemory/drmemory.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,6 @@ dump_statistics(void)
pop_slowpath+pop_fastpath+pop4_fastpath);
dr_fprintf(f_global, "slow instead of fast: %8u, b/c unaligned: %8u, 8@border: %8u\n",
slow_instead_of_fast, slowpath_unaligned, slowpath_8_at_border);
dr_fprintf(f_global, "app instrs: fastpath: %7u, no dup: %7u, xl8: %7u\n",
app_instrs_fastpath, app_instrs_no_dup, xl8_app_for_slowpath);
dr_fprintf(f_global, "addr exceptions: header: %7u, tls: %5u, alloca: %5u\n",
heap_header_exception, tls_exception, alloca_exception);
dr_fprintf(f_global, "more addr exceptions: ld DR: %5u, cpp DR: %5u\n",
Expand Down
12 changes: 1 addition & 11 deletions drmemory/fastpath.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,7 @@ slow_path_xl8_sharing(app_loc_t *loc, size_t inst_sz, opnd_t memop, dr_mcontext_
app_pc pc;
bool translated = true;
ASSERT(loc != NULL && loc->type == APP_LOC_PC, "invalid param");
if (options.single_arg_slowpath) {
/* We don't want to pay the xl8 cost every time so we have
* an additional entry for the cache pc and we only xl8 when
* that crosses the threshold. This may be superior anyway since
* app pc can be duplicated in other bbs where it might behave
* differently (though seems unlikely).
*/
translated = loc->u.addr.valid;
pc = loc->u.addr.pc;
} else
pc = loc_to_pc(loc);
pc = loc_to_pc(loc);
xl8_sharing_cnt = (uint)(ptr_uint_t) hashtable_lookup(&xl8_sharing_table, pc);
if (xl8_sharing_cnt > 0) {
STATS_INC(xl8_shared_slowpath_count);
Expand Down
2 changes: 0 additions & 2 deletions drmemory/fastpath.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,6 @@ typedef struct _fastpath_info_t {
reg_id_t reg3_8;
/* is this instr using shared xl8? */
bool use_shared;
/* for jmp-to-slowpath optimization (PR 494769) */
instr_t *appclone;
instr_t *slow_store_retaddr;
instr_t *slow_store_retaddr2; /* if takes 2 instrs */
opnd_t slow_store_dst;
Expand Down
95 changes: 1 addition & 94 deletions drmemory/instru.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,6 @@ instrument_fragment_delete(void *drcontext/*may be NULL*/, void *tag)
* fail to be optimized b/c it will use the old code's history: so a perf
* failure, not a correctness failure.
*/
/* XXX i#551: -single_arg_slowpath adds a second xl8_sharing_table entry with
* cache pc for each app pc entry which we are not deleting yet. May need a
* table to map the two. Xref DRi#409: while there's no good solution from
* the DR side for app pc flushing, perhaps some event on re-using cache pcs
* could work but seems too specialized.
*/
/* i#768: We used to invalidate entries from ignore_unaddr_table here,
* but that ends up thrashing the code cache. Instead we remove stale
* entries in the new bb event if the alloca pattern no longer matches.
Expand Down Expand Up @@ -1322,60 +1316,6 @@ instru_event_bb_insert(void *drcontext, void *tag, instrlist_t *bb, instr_t *ins
if (INSTRUMENT_MEMREFS())
fastpath_pre_app_instr(drcontext, bb, inst, bi, &mi);

if (mi.appclone != NULL) {
instr_t *nxt = instr_get_next(mi.appclone);
ASSERT(options.single_arg_slowpath, "only used for single_arg_slowpath");
while (nxt != NULL &&
(instr_is_label(nxt) || instr_is_spill(nxt) || instr_is_restore(nxt)))
nxt = instr_get_next(nxt);
ASSERT(nxt != NULL, "app clone error");
DOLOG(3, {
LOG(3, "comparing: ");
instr_disassemble(drcontext, mi.appclone, LOGFILE_GET(drcontext));
LOG(3, "\n");
LOG(3, "with: ");
instr_disassemble(drcontext, nxt, LOGFILE_GET(drcontext));
LOG(3, "\n");
});
STATS_INC(app_instrs_fastpath);
/* only destroy if app instr won't be mangled */
if (instr_same(mi.appclone, nxt) &&
!instr_is_cti(nxt) &&
/* FIXME PR 494769: -single_arg_slowpath cannot be on by default
* until b/c we can't predict whether an instr will be mangled
* for selfmod! Also, today we're not looking for mangling of
* instr_has_rel_addr_reference(). The option is off by default
* until that's addressed by implementing i#156/PR 306163 and
* adding post-mangling bb and trace events.
*/
!instr_is_syscall(nxt) &&
!instr_is_interrupt(nxt)) {
ASSERT(mi.slow_store_retaddr != NULL, "slowpath opt error");
/* point at the jmp so slow_path() knows to return right afterward */
instru_insert_mov_pc(drcontext, bb, mi.slow_store_retaddr,
mi.slow_store_dst, opnd_create_instr(mi.slow_jmp));
/* we've replaced the original so remove it */
instrlist_remove(bb, mi.slow_store_retaddr);
instr_destroy(drcontext, mi.slow_store_retaddr);
mi.slow_store_retaddr = NULL;
if (mi.slow_store_retaddr2 != NULL) {
instrlist_remove(bb, mi.slow_store_retaddr2);
instr_destroy(drcontext, mi.slow_store_retaddr2);
mi.slow_store_retaddr2 = NULL;
}
instrlist_remove(bb, mi.appclone);
instr_destroy(drcontext, mi.appclone);
mi.appclone = NULL;
STATS_INC(app_instrs_no_dup);
} else {
DOLOG(3, {
LOG(3, "need dup for: ");
instr_disassemble(drcontext, mi.appclone, LOGFILE_GET(drcontext));
LOG(3, "\n");
});
}
}

instru_event_bb_insert_done:
if (bi->first_instr && instr_is_app(inst))
bi->first_instr = false;
Expand Down Expand Up @@ -1429,44 +1369,11 @@ instru_event_bb_instru2instru(void *drcontext, void *tag, instrlist_t *bb,
* LOCATION SHARED CODE
*/

#ifdef TOOL_DR_MEMORY
/* for jmp-to-slowpath optimization where we xl8 to get app pc (PR 494769) */
static app_pc
translate_cache_pc(byte *pc_to_xl8)
{
app_pc res;
void *drcontext = dr_get_current_drcontext();
cls_drmem_t *cpt = (cls_drmem_t *) drmgr_get_cls_field(drcontext, cls_idx_drmem);
ASSERT(cpt != NULL, "pt shouldn't be null");
ASSERT(pc_to_xl8 != NULL, "invalid param");
ASSERT(options.single_arg_slowpath, "only used for single_arg_slowpath");
/* ensure event_restore_state() returns true */
cpt->self_translating = true;
res = dr_app_pc_from_cache_pc(pc_to_xl8);
cpt->self_translating = false;
ASSERT(res != NULL, "failure to determine app pc on slowpath");
STATS_INC(xl8_app_for_slowpath);
LOG(3, "translated "PFX" to "PFX" for slowpath\n", pc_to_xl8, res);
return res;
}
#endif

app_pc
loc_to_pc(app_loc_t *loc)
{
ASSERT(loc != NULL && loc->type == APP_LOC_PC, "invalid param");
if (!loc->u.addr.valid) {
#ifdef TOOL_DR_MEMORY
ASSERT(options.single_arg_slowpath, "only used for single_arg_slowpath");
/* pc field holds cache pc that must be translated */
ASSERT(dr_memory_is_dr_internal(loc->u.addr.pc), "invalid untranslated pc");
loc->u.addr.pc = translate_cache_pc(loc->u.addr.pc);
ASSERT(loc->u.addr.pc != NULL, "translation failed");
loc->u.addr.valid = true;
#else
ASSERT(false, "NYI");
#endif
}
ASSERT(loc->u.addr.valid, "-single_arg_slowpath was removed so should be valid");
return loc->u.addr.pc;
}

Expand Down
4 changes: 0 additions & 4 deletions drmemory/optionsx.h
Original file line number Diff line number Diff line change
Expand Up @@ -858,10 +858,6 @@ OPTION_CLIENT_STRING(internal, libc_addrs, "",
OPTION_CLIENT_BOOL(internal, check_push, true,
"Check that pushes are writing to unaddressable memory",
"Check that pushes are writing to unaddressable memory")
OPTION_CLIENT_BOOL(internal, single_arg_slowpath, false,
/* XXX: PR 494769: this feature is not yet finished */
"Use single arg for jmp-to-slowpath and derive second",
"Use single arg for jmp-to-slowpath and derive second")
OPTION_CLIENT_BOOL(internal, repstr_to_loop, true,
"Add fastpath for rep string instrs by converting to normal loop",
"Add fastpath for rep string instrs by converting to normal loop")
Expand Down
105 changes: 13 additions & 92 deletions drmemory/slowpath.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,6 @@ uint xl8_shared_slowpath_instrs;
uint xl8_shared_slowpath_count;
uint slowpath_unaligned;
uint slowpath_8_at_border;
uint app_instrs_fastpath;
uint app_instrs_no_dup;
uint xl8_app_for_slowpath;
uint num_bbs;
#endif

Expand Down Expand Up @@ -566,41 +563,7 @@ slow_path_with_mc(void *drcontext, app_pc pc, app_pc decode_pc, dr_mcontext_t *m
* but we don't rely on them here.
*/

/* for jmp-to-slowpath optimization where we xl8 to get app pc (PR 494769)
* we always pass NULL for decode_pc
*/
if (decode_pc == NULL) {
/* not using safe_read since in cache */
byte *ret_pc = (byte *) get_own_tls_value(SPILL_SLOT_2);
ASSERT(pc == NULL, "invalid params");
ASSERT(options.single_arg_slowpath, "only used for single_arg_slowpath");
/* If the ret pc is a jmp, we know to walk forward, bypassing
* spills, to find the app instr (we assume app jmp never
* needs slowpath). If using a cloned app instr, then ret pc
* points directly there. Since we want to skip the clone and
* the jmp, we always skip the instr at ret pc when returning.
*/
pc = decode_next_pc(drcontext, ret_pc);
ASSERT(pc != NULL, "invalid stored app instr");
set_own_tls_value(SPILL_SLOT_2, (reg_t) pc);
if (*ret_pc == 0xe9) {
/* walk forward to find the app pc */
instr_init(drcontext, &inst);
do {
instr_reset(drcontext, &inst);
decode_pc = pc;
pc = decode(drcontext, decode_pc, &inst);
ASSERT(pc != NULL, "invalid app instr copy");
} while (instr_is_spill(&inst) || instr_is_restore(&inst));
instr_reset(drcontext, &inst);
} else
decode_pc = ret_pc;
/* if we want the app addr later, we'll have to translate to get it */
loc.u.addr.valid = false;
loc.u.addr.pc = decode_pc;
pc = NULL;
} else
ASSERT(!options.single_arg_slowpath, "single_arg_slowpath error");
ASSERT(decode_pc != NULL, "single_arg_slowpath removed");

#ifdef TOOL_DR_MEMORY
if (decode_pc != NULL) {
Expand Down Expand Up @@ -934,7 +897,7 @@ slow_path_with_mc(void *drcontext, app_pc pc, app_pc decode_pc, dr_mcontext_t *m
slow_path_xl8_sharing(&loc, instr_sz, memop, mc);

DOLOG(5, { /* this pollutes the logfile, so it's a pain to have at 4 or lower */
if (!options.single_arg_slowpath && pc == decode_pc/*else retpc not in tls3*/) {
if (pc == decode_pc/*else retpc not in tls3*/) {
/* Test translation when have both args */
/* we want the ultimate target, not whole_bb_spills_enabled()'s
* SPILL_SLOT_5 intermediate target
Expand Down Expand Up @@ -1162,37 +1125,17 @@ instrument_slowpath(void *drcontext, instrlist_t *bb, instr_t *inst,
shared_slowpath_entry_global[r1][r2][r3]:
shared_slowpath_entry_local[r1][r2][r3][ef]);
ASSERT(tgt != NULL, "targeting un-generated slowpath");
if (options.single_arg_slowpath) {
/* for jmp-to-slowpath optimization: we point at app instr, or a
* clone of it, for pc to decode from (PR 494769), as the
* retaddr, and thus do not need a second parameter.
*/
mi->appclone = instr_clone(drcontext, inst);
mi->slow_store_dst = (r2 == SPILL_REG_NONE) ?
spill_slot_opnd(drcontext, SPILL_SLOT_2) : opnd_create_reg(s2->reg);
instrlist_insert_mov_instr_addr(drcontext, mi->appclone,
NULL /* in code cache */,
mi->slow_store_dst,
bb, inst, &mi->slow_store_retaddr,
&mi->slow_store_retaddr2);
mi->slow_jmp = XINST_CREATE_jump(drcontext, opnd_create_pc(tgt));
PRE(bb, inst, mi->slow_jmp);
instr_set_meta(mi->appclone);
instr_set_translation(mi->appclone, NULL);
PRE(bb, inst, mi->appclone);
} else {
instru_insert_mov_pc(drcontext, bb, inst,
(r1 == SPILL_REG_NONE) ?
spill_slot_opnd(drcontext, SPILL_SLOT_SLOW_PARAM) :
opnd_create_reg(s1->reg),
decode_pc_opnd);
instru_insert_mov_pc(drcontext, bb, inst,
(r2 == SPILL_REG_NONE) ?
spill_slot_opnd(drcontext, SPILL_SLOT_SLOW_RET) :
opnd_create_reg(s2->reg),
opnd_create_instr(appinst));
PRE(bb, inst, XINST_CREATE_jump(drcontext, opnd_create_pc(tgt)));
}
instru_insert_mov_pc(drcontext, bb, inst,
(r1 == SPILL_REG_NONE) ?
spill_slot_opnd(drcontext, SPILL_SLOT_SLOW_PARAM) :
opnd_create_reg(s1->reg),
decode_pc_opnd);
instru_insert_mov_pc(drcontext, bb, inst,
(r2 == SPILL_REG_NONE) ?
spill_slot_opnd(drcontext, SPILL_SLOT_SLOW_RET) :
opnd_create_reg(s2->reg),
opnd_create_instr(appinst));
PRE(bb, inst, XINST_CREATE_jump(drcontext, opnd_create_pc(tgt)));
}
PRE(bb, inst, appinst);
/* If we entered the slowpath, we've clobbered the reg holding the address to
Expand Down Expand Up @@ -1348,28 +1291,6 @@ generate_shared_slowpath(void *drcontext, instrlist_t *ilist, byte *pc)
}
shared_slowpath_save_retaddr(drcontext, ilist, r2);
shared_slowpath_restore(drcontext, ilist, r2, SPILL_SLOT_2);
if (options.single_arg_slowpath) {
/* for jmp-to-slowpath optimization we don't have 2nd
* param, so pass 0 (PR 494769)
*/
if (r1 >= SPILL_REG_EAX && r1 <= SPILL_REG_EBX) {
PRE(ilist, NULL, INSTR_CREATE_mov_imm
(drcontext,
opnd_create_reg(DR_REG_XAX +
(r1 - SPILL_REG_EAX)),
OPND_CREATE_INT32(0)));
} else if (r1 >= SPILL_REG_EAX_DEAD && r1 <= SPILL_REG_EBX_DEAD) {
PRE(ilist, NULL, INSTR_CREATE_mov_imm
(drcontext,
opnd_create_reg(DR_REG_XAX +
(r1 - SPILL_REG_EAX_DEAD)),
OPND_CREATE_INT32(0)));
} else {
PRE(ilist, NULL, INSTR_CREATE_mov_st
(drcontext, spill_slot_opnd(drcontext, SPILL_SLOT_1),
OPND_CREATE_INT32(0)));
}
}
shared_slowpath_save_param(drcontext, ilist, r1);
shared_slowpath_restore(drcontext, ilist, r1, SPILL_SLOT_1);
PRE(ilist, NULL,
Expand Down
6 changes: 1 addition & 5 deletions drmemory/slowpath.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2010-2016 Google, Inc. All rights reserved.
* Copyright (c) 2010-2017 Google, Inc. All rights reserved.
* Copyright (c) 2008-2010 VMware, Inc. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -41,7 +41,6 @@

/* we only need a little over 2 pages for whole_bb_spills_enabled(): could get
* onto 2 pages by not emitting SPILL_REG_NONE.
* -no_single_arg_slowpath needs only 10 pages.
*/
#ifdef X64
/* linux needs 14 pages, windows needs 16 pages */
Expand Down Expand Up @@ -153,9 +152,6 @@ extern uint slowpath_unaligned;
extern uint slowpath_8_at_border;
extern uint alloc_stack_count;
extern uint delayed_free_bytes;
extern uint app_instrs_fastpath;
extern uint app_instrs_no_dup;
extern uint xl8_app_for_slowpath;
extern uint num_bbs;

# ifdef X86
Expand Down
3 changes: 1 addition & 2 deletions drmemory/spill.c
Original file line number Diff line number Diff line change
Expand Up @@ -408,8 +408,7 @@ event_restore_state(void *drcontext, bool restore_memory, dr_restore_state_info_

/* Are we asking DR to translate just pc? Then return true and ignore regs */
if (cpt->self_translating) {
ASSERT(options.single_arg_slowpath || options.verbose >= 3,
"only used for single_arg_slowpath or -verbose 3+");
ASSERT(options.verbose >= 3, "only used for -verbose 3+");
return true;
}
#endif
Expand Down