Skip to content

Commit

Permalink
i#1982 bbbuf.c: Fixes incorrect wrapping
Browse files Browse the repository at this point in the history
Fixes the incorrect wrapping on the buffer by bbbuf.c. Previously it
used a uqadd16, which saturates instead of wraps, resulting in the
buffer never wrapping. We now use drx_buf which optimizes for buffers of
16KB to produce the same instrumentation.

Also optimizes drx_buf to fast-fail any signal handlers if only the
optimized circular buffer has been created.

Fixes #1982

Review-URL: https://codereview.appspot.com/317080043
  • Loading branch information
toshipiazza committed Jan 11, 2017
1 parent 59721da commit 2380ed9
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 127 deletions.
2 changes: 1 addition & 1 deletion api/samples/CMakeLists.txt
Expand Up @@ -204,7 +204,7 @@ configure_DynamoRIO_global(OFF ON)
# Use ;-separated lists for source files and extensions.

if (NOT AARCH64) # FIXME i#1569: port to AArch64
add_sample_client(bbbuf "bbbuf.c" "drmgr;drreg")
add_sample_client(bbbuf "bbbuf.c" "drmgr;drreg;drx")
endif ()
add_sample_client(bbsize "bbsize.c" "drmgr")
add_sample_client(empty "empty.c" "")
Expand Down
153 changes: 28 additions & 125 deletions api/samples/bbbuf.c
Expand Up @@ -41,48 +41,31 @@
* - store the starting pc of the basic block into the buffer,
* - update the pointer by incrementing just the low 16 bits of the pointer
* so we will fill the buffer in a cyclical way.
* This is all done via the fast circular buffer code provided by the drx_buf
* extension.
* This sample can be used for hot path profiling or debugging with execution
* history.
*/

#include "dr_api.h"
#include "drmgr.h"
#include "drreg.h"
#include "drx.h"
#include <string.h>

#define MINSERT instrlist_meta_preinsert

#define TESTALL(mask, var) (((mask) & (var)) == (mask))
#define TESTANY(mask, var) (((mask) & (var)) != 0)
#define ALIGN_FORWARD(x, alignment) \
((((ptr_uint_t)x) + ((alignment)-1)) & (~((alignment)-1)))

#define BUF_64K_BYTE (1 << 16)
/* We make TLS_BUF_SIZE to be 128KB so we can have a 64KB buffer
* with 64KB aligned starting address.
/* drx_buf makes our work easy as it already has first-class support for the
* fast circular buffer.
*/
#define TLS_BUF_SIZE (BUF_64K_BYTE * 2)
static reg_id_t tls_seg;
static uint tls_offs;
static int tls_idx;

typedef struct _per_thread_t {
void *seg_base;
void *buf_base;
} per_thread_t;
static drx_buf_t *buf;

static dr_emit_flags_t
event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst,
bool for_trace, bool translating, void *user_data)
{
app_pc pc = dr_fragment_app_pc(tag);
reg_id_t reg;
#ifdef ARM
/* We need a 2nd scratch reg for several operations */
reg_id_t reg2;
#else
bool dead;
#endif
/* We need a 2nd scratch reg for several operations on ARM only */
reg_id_t reg2 = DR_REG_NULL;

/* We do all our work at the start of the block prior to the first instr */
if (!drmgr_is_first_instr(drcontext, inst))
Expand All @@ -95,73 +78,31 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
}

#ifdef ARM
/* We need a second register here, because the drx_buf routines need a scratch reg
* for ARM.
*/
if (drreg_reserve_register(drcontext, bb, inst, NULL, &reg2) != DRREG_SUCCESS) {
DR_ASSERT(false); /* cannot recover */
return DR_EMIT_DEFAULT;
}
#endif

/* load buffer pointer from TLS field */
dr_insert_read_raw_tls(drcontext, bb, inst, tls_seg, tls_offs, reg);
drx_buf_insert_load_buf_ptr(drcontext, buf, bb, inst, reg);


/* store bb's start pc into the buffer */
#ifdef X86
/* XXX i#1694: split this sample into separate simple and optimized versions,
* with the simple using cross-platform instru and the optimized split into
* arm vs x86 versions.
*/
instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)pc,
OPND_CREATE_MEMPTR(reg, 0),
bb, inst, NULL, NULL);
#elif defined(ARM)
instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)pc,
opnd_create_reg(reg2),
bb, inst, NULL, NULL);
MINSERT(bb, inst, XINST_CREATE_store
(drcontext, OPND_CREATE_MEMPTR(reg, 0), opnd_create_reg(reg2)));
#endif
drx_buf_insert_buf_store(drcontext, buf, bb, inst, reg, reg2,
OPND_CREATE_INTPTR(pc), OPSZ_PTR, 0);

/* update the TLS buffer pointer by incrementing just the bottom 16 bits of
* the pointer
*/
#ifdef X86
if (drreg_are_aflags_dead(drcontext, inst, &dead) == DRREG_SUCCESS && dead) {
/* if aflags are dead, we use add directly */
MINSERT(bb, inst, INSTR_CREATE_add
(drcontext,
opnd_create_far_base_disp(tls_seg, DR_REG_NULL, DR_REG_NULL,
0, tls_offs, OPSZ_2),
OPND_CREATE_INT8(sizeof(app_pc))));
} else {
reg_id_t reg_16;
# ifdef X64
reg_16 = reg_32_to_16(reg_64_to_32(reg));
# else
reg_16 = reg_32_to_16(reg);
# endif
/* we use lea to avoid aflags save/restore */
MINSERT(bb, inst, INSTR_CREATE_lea
(drcontext,
opnd_create_reg(reg_16),
opnd_create_base_disp(reg, DR_REG_NULL, 0,
sizeof(app_pc), OPSZ_lea)));
dr_insert_write_raw_tls(drcontext, bb, inst, tls_seg, tls_offs, reg);
}
#elif defined(ARM)
/* We use this sequence:
* mov r1, #4
* uqadd16 r0, r0, r1
/* Internally this will update the TLS buffer pointer by incrementing just the bottom
* 16 bits of the pointer.
*/
MINSERT(bb, inst, INSTR_CREATE_mov
(drcontext, opnd_create_reg(reg2), OPND_CREATE_INT8(sizeof(app_pc))));
MINSERT(bb, inst, INSTR_CREATE_uqadd16
(drcontext, opnd_create_reg(reg), opnd_create_reg(reg),
opnd_create_reg(reg2)));
dr_insert_write_raw_tls(drcontext, bb, inst, tls_seg, tls_offs, reg);
#endif
drx_buf_insert_update_buf_ptr(drcontext, buf, bb, inst, reg, reg2, sizeof(app_pc));

if (drreg_unreserve_register(drcontext, bb, inst, reg) != DRREG_SUCCESS)
DR_ASSERT(false);

#ifdef ARM
if (drreg_unreserve_register(drcontext, bb, inst, reg2) != DRREG_SUCCESS)
DR_ASSERT(false);
Expand All @@ -173,53 +114,23 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
static void
event_thread_init(void *drcontext)
{
per_thread_t *data = dr_thread_alloc(drcontext, sizeof(*data));
byte *data;

DR_ASSERT(data != NULL);
drmgr_set_tls_field(drcontext, tls_idx, data);
/* Keep seg_base in a per-thread data structure so we can get the TLS
* slot and find where the pointer points to in the buffer.
* It is mainly for users using a debugger to get the execution history.
*/
data->seg_base = dr_get_dr_segment_base(tls_seg);
/* We allocate a 128KB buffer to make sure we have a 64KB buffer with
* 64KB-aligned starting address, so that we can fill the buffer
* cyclically by incrementing the bottom 16 bits of the pointer.
*/
data->buf_base = dr_raw_mem_alloc(TLS_BUF_SIZE,
DR_MEMPROT_READ | DR_MEMPROT_WRITE,
NULL);
DR_ASSERT(data->seg_base != NULL && data->buf_base != NULL);
memset(data->buf_base, 0, TLS_BUF_SIZE);
/* put the 64KB-aligned address into TLS slot as the pointer pointing
* to the 64KB cyclic buffer
*/
*(void **)((byte *)(data->seg_base) + tls_offs) = (void *)
ALIGN_FORWARD(data->buf_base, BUF_64K_BYTE);
}

static void
event_thread_exit(void *drcontext)
{
per_thread_t *data = drmgr_get_tls_field(drcontext, tls_idx);
dr_raw_mem_free(data->buf_base, TLS_BUF_SIZE);
dr_thread_free(drcontext, data, sizeof(*data));
data = drx_buf_get_buffer_ptr(drcontext, buf);
memset(data, 0, DRX_BUF_FAST_CIRCULAR_BUFSZ);
}

static void
event_exit(void)
{
if (!dr_raw_tls_cfree(tls_offs, 1))
DR_ASSERT(false);

if (!drmgr_unregister_thread_init_event(event_thread_init) ||
!drmgr_unregister_thread_exit_event(event_thread_exit) ||
!drmgr_unregister_tls_field(tls_idx) ||
!drmgr_unregister_bb_insertion_event(event_app_instruction) ||
drreg_exit() != DRREG_SUCCESS)
DR_ASSERT(false);

drx_buf_free(buf);
drmgr_exit();
drx_exit();
}

DR_EXPORT void
Expand All @@ -228,25 +139,17 @@ dr_client_main(client_id_t id, int argc, const char *argv[])
drreg_options_t ops = {sizeof(ops), 2 /*max slots needed*/, false};
dr_set_client_name("DynamoRIO Sample Client 'bbbuf'",
"http://dynamorio.org/issues");
if (!drmgr_init() ||
if (!drmgr_init() || !drx_init() ||
drreg_init(&ops) != DRREG_SUCCESS)
DR_ASSERT(false);

buf = drx_buf_create_circular_buffer(DRX_BUF_FAST_CIRCULAR_BUFSZ);
DR_ASSERT(buf);

/* register events */
dr_register_exit_event(event_exit);
if (!drmgr_register_thread_init_event(event_thread_init) ||
!drmgr_register_thread_exit_event(event_thread_exit) ||
!drmgr_register_bb_instrumentation_event(NULL, event_app_instruction, NULL))
DR_ASSERT(false);

tls_idx = drmgr_register_tls_field();
DR_ASSERT(tls_idx > -1);

/* The TLS field provided by DR cannot be directly accessed from the code cache.
* For better performance, we allocate raw TLS so that we can directly
* access and update it with a single instruction.
*/
if (!dr_raw_tls_calloc(&tls_seg, &tls_offs, 1, 0))
DR_ASSERT(false);
}

5 changes: 4 additions & 1 deletion ext/drx/drx_buf.c
Expand Up @@ -216,7 +216,10 @@ drx_buf_init(drx_buf_type_t bt, size_t bsz,
drvector_append(&clients, new_client);
dr_rwlock_write_unlock(global_buf_rwlock);

if (!any_bufs_created)
/* We don't need the usual setup for buffers if we're using
* the optimized circular buffer.
*/
if (!any_bufs_created && bt != DRX_BUF_CIRCULAR_FAST)
any_bufs_created = true;

return new_client;
Expand Down

0 comments on commit 2380ed9

Please sign in to comment.