From 393073a825536857d61209fe91569b1b4527c7e8 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Sat, 4 Nov 2023 21:41:27 -0400 Subject: [PATCH 1/2] i#6397: Add instr_is_opnd_store_source() (#6405) Adds a new IR API routine instr_is_opnd_store_source() which identifies whether a given source operand supplies the value to be stored into memory for a store instruction. Although generally DR avoids modeling dataflow, especially for complex SIMD shifts, this data identification is useful enough that we provide a helper function. However, we do not go as far as annotating the decoder operands, and instead deduce the answer based on conventions in our IR, which should suffice for now and even if not as clean as decoder markup it is better to have this helper in one place we could improve in the future instead of different users trying to figure this out. Adds some tests for each architecture. Fixes #6397 --- api/docs/release.dox | 1 + core/ir/aarch64/instr_create_api.h | 9 +-- core/ir/instr_api.h | 12 ++++ core/ir/instr_shared.c | 94 ++++++++++++++++++++++++++++++ suite/tests/api/drdecode_aarch64.c | 43 ++++++++++++++ suite/tests/api/drdecode_arm.c | 47 ++++++++++++++- suite/tests/api/drdecode_x86.c | 43 ++++++++++++++ 7 files changed, 244 insertions(+), 5 deletions(-) diff --git a/api/docs/release.dox b/api/docs/release.dox index 6d1e8444c07..70894a86826 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -174,6 +174,7 @@ Further non-compatibility-affecting changes include: - Added -record_syscall to drmemtrace for recording syscall parameters. - Added opportunity to run multiple drcachesim analysis tools simultaneously. - Added support of loading separately-built analysis tools to drcachesim dynamically. + - Added instr_is_opnd_store_source(). **************************************************
diff --git a/core/ir/aarch64/instr_create_api.h b/core/ir/aarch64/instr_create_api.h index 7d50163d6d0..6dfe149ef52 100644 --- a/core/ir/aarch64/instr_create_api.h +++ b/core/ir/aarch64/instr_create_api.h @@ -2942,20 +2942,21 @@ * Creates an LDR immediate instruction. * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Rt The output register. - * \param Xn The input register or stack pointer + * \param Xn The input register or stack pointer. * \param Rn The input memory disposition. * \param imm Immediate int of the input register offset */ #define INSTR_CREATE_ldr_imm(dc, Rt, Xn, Rn, imm) \ instr_create_2dst_3src(dc, OP_ldr, Rt, Xn, Rn, Xn, imm) +/* XXX: This should auto-extract the base reg and the immediate from the memop! */ /** * Creates a STR immediate instruction. * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Rt The output memory disposition. - * \param Xt The input register or stack pointer. - * \param Xn The output register - * \param imm Immediate int of the output register offset + * \param Xt The input register or stack pointer to store. + * \param Xn The output register which must be the address base register. + * \param imm Immediate int of the output register offset. */ #define INSTR_CREATE_str_imm(dc, Rt, Xt, Xn, imm) \ instr_create_2dst_3src(dc, OP_str, Rt, Xn, Xt, Xn, imm) diff --git a/core/ir/instr_api.h b/core/ir/instr_api.h index 632acf2bb05..9a5ff73e7b4 100644 --- a/core/ir/instr_api.h +++ b/core/ir/instr_api.h @@ -864,6 +864,18 @@ DR_API void instr_set_target(instr_t *cti_instr, opnd_t target); +DR_API +/** + * If \p store_instr is not a store (instr_writes_memory() returns false), returns + * false. If \p store_instr is a store (instr_writes_memory() returns true), returns + * whether its source operand with index \p source_ordinal (as passed to + * instr_get_src()) is a source for the value that is stored. (If not, it may be an + * address register that is updated for pre-index or post-index writeback forms, or + * some other source that does not directly affect the value written to memory.) + */ +bool +instr_is_opnd_store_source(instr_t *store_instr, int source_ordinal); + INSTR_INLINE_INTERNALLY DR_API /** Returns true iff \p instr's operands are up to date. */ diff --git a/core/ir/instr_shared.c b/core/ir/instr_shared.c index c36f369c843..00540b4d31f 100644 --- a/core/ir/instr_shared.c +++ b/core/ir/instr_shared.c @@ -751,6 +751,100 @@ instr_set_target(instr_t *instr, opnd_t target) instr_set_operands_valid(instr, true); } +bool +instr_is_opnd_store_source(instr_t *store_instr, int source_ordinal) +{ + /* We generally do not model data movement in our IR, especially for complex + * SIMD swaps and shuffles: we just have flat lists of sources and dests. + * Taint trackers or other dataflow tools are expected to dispatch on opcode + * for cases where some sources map to a subset of the dests. However, + * identifying which sources flow into the (typically unique) memory + * destination is a key piece of information we do try to provide. We do + * not yet go as far as labeling this in the IR decoding codec/table info + * sources and thus rely on operand ordering conventions. If these heuristics + * prove too fragile we can move toward more direct support, but by + * providing an official helper function here now we at least get all users + * using the same code we can update later. + */ + if (store_instr == NULL || source_ordinal < 0 || + source_ordinal >= instr_num_srcs(store_instr)) + return false; + /* First, find the (first) store. */ + opnd_t memop = opnd_create_null(); + for (int i = 0; i < instr_num_dsts(store_instr); i++) { + memop = instr_get_dst(store_instr, i); + if (opnd_is_memory_reference(memop)) + break; + } + if (opnd_is_null(memop)) + return false; +#ifdef X86 + /* First, handle exceptional cases. */ + if (instr_get_opcode(store_instr) == OP_cmpxchg8b) { + /* Our table has xcx:xbx in later slots. */ + return source_ordinal == 3 || source_ordinal == 4; + } +#endif + /* A convention in our IR is to always list the primary data source as + * the first source operand. + */ + if (source_ordinal == 0) + return true; +#ifdef X86 + /* XXX: Are there other atomics on x86 or other arches we need to list here too? */ + if (instr_get_opcode(store_instr) == OP_cmpxchg) + return false; +#endif + /* CISC ALU ops have the target listed as a (non-first) source memop. */ + if (opnd_same(memop, instr_get_src(store_instr, source_ordinal))) + return true; + /* Now we need to distinuish additional register data sources from an + * address register update. There can be many additional data sources + * (x86 OP_pusha, aarch32 register lists). But an address register update + * will always be listed as both a source and a dest, at the end, even if this + * duplicates a data source. E.g.: + * + * e92dffff stmdb %r0 %r1 %r2 %r3 %r4 %r5 %r6 %r7 %r8 %r9 %r10 %r11 %r12 %sp %lr + * %pc %sp -> -0x40(%sp)[64byte] %sp + * + * There can be an immediate before (aarch32) or after (aarch64) the source for + * cases where the adjustment is not fixed: e.g., AArch64 INSTR_CREATE_str_imm(). + * + * Our OP_pusha entry violates the address register convention below by listing + * xsp first but not duplicating it at the end: but that means we need take no + * further action here as all sources are data sources. + */ + if (instr_num_srcs(store_instr) > 1 && instr_num_dsts(store_instr) > 1) { + /* Is the last dest an address register? */ + opnd_t last_dst = instr_get_dst(store_instr, instr_num_dsts(store_instr) - 1); + if (opnd_is_reg(last_dst) && + /* See whether the memory operation uses the last dest reg. */ + opnd_uses_reg(memop, opnd_get_reg(last_dst))) { + /* Is there an identical source operand register, at the end or prior to + * an immed that is at the end? + */ + opnd_t last_src = instr_get_src(store_instr, instr_num_srcs(store_instr) - 1); + /* Move prior to an immed if there are enough srcs for reg, reg, imm. */ + bool has_immed = false; + if (opnd_is_immed_int(last_src) && instr_num_srcs(store_instr) > 2) { + has_immed = true; + last_src = instr_get_src(store_instr, instr_num_srcs(store_instr) - 2); + } else if (instr_num_srcs(store_instr) > 2 && + opnd_is_immed_int( + instr_get_src(store_instr, instr_num_srcs(store_instr) - 2))) { + has_immed = true; + } + if (opnd_same(last_dst, last_src)) { + /* Fits pattern of address register. */ + if (source_ordinal == instr_num_srcs(store_instr) - 1 || + (has_immed && source_ordinal == instr_num_srcs(store_instr) - 2)) + return false; + } + } + } + return true; +} + instr_t * instr_set_prefix_flag(instr_t *instr, uint prefix) { diff --git a/suite/tests/api/drdecode_aarch64.c b/suite/tests/api/drdecode_aarch64.c index e8fc59a9c5e..58cc7f1339e 100644 --- a/suite/tests/api/drdecode_aarch64.c +++ b/suite/tests/api/drdecode_aarch64.c @@ -201,6 +201,47 @@ test_categories(void) ASSERT(cat == DR_INSTR_CATEGORY_UNCATEGORIZED); } +static void +test_store_source(void) +{ + instr_t *in = XINST_CREATE_store(GD, OPND_CREATE_MEMPTR(DR_REG_R0, 42), + opnd_create_reg(DR_REG_R1)); + ASSERT(!instr_is_opnd_store_source(in, -1)); /* Out of bounds. */ + ASSERT(instr_is_opnd_store_source(in, 0)); /* r1. */ + ASSERT(!instr_is_opnd_store_source(in, 1)); /* Out of bounds. */ + instr_destroy(GD, in); + + in = INSTR_CREATE_str_imm(GD, OPND_CREATE_MEMPTR(DR_REG_R0, 16), + opnd_create_reg(DR_REG_R1), opnd_create_reg(DR_REG_R0), + OPND_CREATE_INT(16)); + ASSERT(instr_is_opnd_store_source(in, 0)); /* r1. */ + ASSERT(!instr_is_opnd_store_source(in, 1)); /* r0. */ + ASSERT(!instr_is_opnd_store_source(in, 2)); /* immed. */ + instr_destroy(GD, in); + + /* Test data==addr reg. */ + in = INSTR_CREATE_str_imm(GD, OPND_CREATE_MEMPTR(DR_REG_R0, 16), + opnd_create_reg(DR_REG_R0), opnd_create_reg(DR_REG_R0), + OPND_CREATE_INT(16)); + ASSERT(instr_is_opnd_store_source(in, 0)); /* r0. */ + ASSERT(!instr_is_opnd_store_source(in, 1)); /* r0 address. */ + ASSERT(!instr_is_opnd_store_source(in, 2)); /* immed. */ + instr_destroy(GD, in); + + in = INSTR_CREATE_stp(GD, OPND_CREATE_MEMPTR(DR_REG_R0, 0), + opnd_create_reg(DR_REG_R1), opnd_create_reg(DR_REG_R2)); + ASSERT(instr_is_opnd_store_source(in, 0)); /* r1. */ + ASSERT(instr_is_opnd_store_source(in, 1)); /* r2. */ + instr_destroy(GD, in); + + /* Test data==addr reg. */ + in = INSTR_CREATE_stp(GD, OPND_CREATE_MEMPTR(DR_REG_R0, 0), + opnd_create_reg(DR_REG_R0), opnd_create_reg(DR_REG_R1)); + ASSERT(instr_is_opnd_store_source(in, 0)); /* r0. */ + ASSERT(instr_is_opnd_store_source(in, 1)); /* r1. */ + instr_destroy(GD, in); +} + int main() { @@ -212,6 +253,8 @@ main() test_categories(); + test_store_source(); + print("done\n"); return 0; diff --git a/suite/tests/api/drdecode_arm.c b/suite/tests/api/drdecode_arm.c index fcd3204d69b..961d421fe57 100644 --- a/suite/tests/api/drdecode_arm.c +++ b/suite/tests/api/drdecode_arm.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015 Google, Inc. All rights reserved. + * Copyright (c) 2015-2023 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -110,6 +110,49 @@ test_noalloc(void) */ } +static void +test_store_source(void) +{ + instr_t *in = XINST_CREATE_store(GD, OPND_CREATE_MEMPTR(DR_REG_R0, 42), + opnd_create_reg(DR_REG_R1)); + ASSERT(!instr_is_opnd_store_source(in, -1)); /* Out of bounds. */ + ASSERT(instr_is_opnd_store_source(in, 0)); /* r1. */ + ASSERT(!instr_is_opnd_store_source(in, 1)); /* Out of bounds. */ + instr_destroy(GD, in); + + in = INSTR_CREATE_push(GD, opnd_create_reg(DR_REG_R1)); + ASSERT(instr_is_opnd_store_source(in, 0)); /* r1. */ + ASSERT(!instr_is_opnd_store_source(in, 1)); /* immed. */ + ASSERT(!instr_is_opnd_store_source(in, 2)); /* sp. */ + instr_destroy(GD, in); + + in = INSTR_CREATE_str_wbimm(GD, OPND_CREATE_MEMPTR(DR_REG_R0, 42), + opnd_create_reg(DR_REG_R0), OPND_CREATE_INT(16)); + ASSERT(instr_is_opnd_store_source(in, 0)); /* r0. */ + ASSERT(!instr_is_opnd_store_source(in, 1)); /* immed. */ + ASSERT(!instr_is_opnd_store_source(in, 2)); /* r0 address. */ + instr_destroy(GD, in); + + in = INSTR_CREATE_stmdb_wb(GD, OPND_CREATE_MEMLIST(DR_REG_R3), 10, + opnd_create_reg(DR_REG_R0), opnd_create_reg(DR_REG_R1), + opnd_create_reg(DR_REG_R2), opnd_create_reg(DR_REG_R3), + opnd_create_reg(DR_REG_R4), opnd_create_reg(DR_REG_R5), + opnd_create_reg(DR_REG_R6), opnd_create_reg(DR_REG_R7), + opnd_create_reg(DR_REG_R8), opnd_create_reg(DR_REG_R9)); + ASSERT(instr_is_opnd_store_source(in, 0)); /* r0. */ + ASSERT(instr_is_opnd_store_source(in, 1)); /* r1. */ + ASSERT(instr_is_opnd_store_source(in, 2)); /* r2. */ + ASSERT(instr_is_opnd_store_source(in, 3)); /* r3. */ + ASSERT(instr_is_opnd_store_source(in, 4)); /* r4. */ + ASSERT(instr_is_opnd_store_source(in, 5)); /* r5. */ + ASSERT(instr_is_opnd_store_source(in, 6)); /* r6. */ + ASSERT(instr_is_opnd_store_source(in, 7)); /* r7. */ + ASSERT(instr_is_opnd_store_source(in, 8)); /* r8. */ + ASSERT(instr_is_opnd_store_source(in, 9)); /* r9. */ + ASSERT(!instr_is_opnd_store_source(in, 10)); /* r3 addr. */ + instr_destroy(GD, in); +} + int main() { @@ -117,6 +160,8 @@ main() test_noalloc(); + test_store_source(); + printf("done\n"); return 0; diff --git a/suite/tests/api/drdecode_x86.c b/suite/tests/api/drdecode_x86.c index f0565956285..fc53a4e3e31 100644 --- a/suite/tests/api/drdecode_x86.c +++ b/suite/tests/api/drdecode_x86.c @@ -185,6 +185,47 @@ test_categories(void) CHECK_CATEGORY(GD, instr, buf, DR_INSTR_CATEGORY_BRANCH); } +static void +test_store_source(void) +{ + instr_t *in = XINST_CREATE_store(GD, OPND_CREATE_MEMPTR(DR_REG_XAX, 42), + opnd_create_reg(DR_REG_XDX)); + ASSERT(!instr_is_opnd_store_source(in, -1)); /* Out of bounds. */ + ASSERT(instr_is_opnd_store_source(in, 0)); /* xdx. */ + ASSERT(!instr_is_opnd_store_source(in, 1)); /* Out of bounds. */ + instr_destroy(GD, in); + + in = INSTR_CREATE_add(GD, OPND_CREATE_MEMPTR(DR_REG_XAX, 42), + opnd_create_reg(DR_REG_XDX)); + ASSERT(!instr_is_opnd_store_source(in, -1)); /* Out of bounds. */ + ASSERT(instr_is_opnd_store_source(in, 0)); /* xdx. */ + ASSERT(instr_is_opnd_store_source(in, 1)); /* memop. */ + ASSERT(!instr_is_opnd_store_source(in, 2)); /* Out of bounds. */ + instr_destroy(GD, in); + + in = INSTR_CREATE_cmpxchg8b( + GD, opnd_create_base_disp(DR_REG_XAX, DR_REG_NULL, 0, 42, OPSZ_8)); + ASSERT(!instr_is_opnd_store_source(in, 0)); /* Memop. */ + ASSERT(!instr_is_opnd_store_source(in, 1)); /* xax. */ + ASSERT(!instr_is_opnd_store_source(in, 2)); /* xdx. */ + ASSERT(instr_is_opnd_store_source(in, 3)); /* xcx. */ + ASSERT(instr_is_opnd_store_source(in, 4)); /* xbx. */ + instr_destroy(GD, in); + +#ifndef X64 + in = INSTR_CREATE_pusha(GD); + ASSERT(instr_is_opnd_store_source(in, 0)); /* xsp. */ + ASSERT(instr_is_opnd_store_source(in, 1)); /* xax. */ + ASSERT(instr_is_opnd_store_source(in, 2)); /* xbx. */ + ASSERT(instr_is_opnd_store_source(in, 3)); /* xcx. */ + ASSERT(instr_is_opnd_store_source(in, 4)); /* xdx. */ + ASSERT(instr_is_opnd_store_source(in, 5)); /* xbp. */ + ASSERT(instr_is_opnd_store_source(in, 6)); /* xsi. */ + ASSERT(instr_is_opnd_store_source(in, 7)); /* xdi. */ + instr_destroy(GD, in); +#endif +} + int main() { @@ -198,6 +239,8 @@ main() test_categories(); + test_store_source(); + printf("done\n"); return 0; From 62c002d0765ae1eb6836b8b3e360ca291d636568 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Mon, 6 Nov 2023 15:05:59 -0500 Subject: [PATCH 2/2] Fix incorrect doxygen xref (#6410) Fixes a doxygen ref to "dr_abort_ex" to the correct "dr_abort_with_code". --- core/lib/dr_tools.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/lib/dr_tools.h b/core/lib/dr_tools.h index efbb1c636c8..4b93633243e 100644 --- a/core/lib/dr_tools.h +++ b/core/lib/dr_tools.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2010-2022, Inc. All rights reserved. + * Copyright (c) 2010-2023, Inc. All rights reserved. * Copyright (c) 2002-2010 VMware, Inc. All rights reserved. * **********************************************************/ @@ -295,7 +295,7 @@ DR_API * \note Calling this from \p dr_client_main or from the primary thread's * initialization event is not guaranteed to always work, as DR may * invoke a thread exit event where a thread init event was never - * called. We recommend using dr_abort_ex() or waiting for full + * called. We recommend using dr_abort_with_code() or waiting for full * initialization prior to use of this routine. */ void