Skip to content

Commit

Permalink
i#2039 trace trim, part 4: Clamp timestamps
Browse files Browse the repository at this point in the history
Moves the timestamp+cpu headers to be added at buffer creation time
instead of at buffer output time.

Makes instru_t::frozen_timestamp_ use release-acquire semantics and
sets it for -align_endpoints at detach time.

Adds a 'minimum timestamp' recorded at post-attach time.  At buffer
output time, we examine the buffer's timestamp and if it was from
before the minimum, we replace it with the current (not the minimum)
timestamp.

Tested on a large app where these changes result in much tighter
timestamps matching the burst period.

Issue: #2039
  • Loading branch information
derekbruening committed Oct 25, 2022
1 parent df5ab9f commit 8f2123b
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 40 deletions.
17 changes: 14 additions & 3 deletions clients/drcachesim/tracer/instru.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@

#include <stdint.h>
#include <string.h>
#include <atomic>
#include "dr_api.h"
#include "drvector.h"
#include "trace_entry.h"
Expand Down Expand Up @@ -179,6 +180,7 @@ class instru_t {
, disable_optimizations_(disable_opts)
, instr_size_(instruction_size)
{
frozen_timestamp_.store(0, std::memory_order_release);
}
virtual ~instru_t()
{
Expand Down Expand Up @@ -221,10 +223,15 @@ class instru_t {
// This is a per-buffer-writeout header.
virtual int
append_unit_header(byte *buf_ptr, thread_id_t tid, ptr_int_t window) = 0;
// If the entry at buf_ptr is not a timestamp, returns false.
// Else, if the timestamp value is < min_timestamp, replaces it with a
// new timestamp (ignoring the frozen timestamp). Returns true either way.
virtual bool
refresh_unit_header_timestamp(byte *buf_ptr, uint64 min_timestamp) = 0;
virtual void
set_frozen_timestamp(uint64 timestamp)
{
frozen_timestamp_ = timestamp;
frozen_timestamp_.store(timestamp, std::memory_order_release);
}

// These insert inlined code to add an entry into the trace buffer.
Expand Down Expand Up @@ -292,8 +299,8 @@ class instru_t {
drvector_t *reg_vector_;
bool disable_optimizations_;
// Stores a timestamp to use for all future unit headers. This is meant for
// avoiding time gaps for max-limit scenarios (i#5021).
uint64 frozen_timestamp_ = 0;
// avoiding time gaps for -align_endpoints or max-limit scenarios (i#5021).
std::atomic<uint64> frozen_timestamp_;

private:
instru_t()
Expand Down Expand Up @@ -339,6 +346,8 @@ class online_instru_t : public instru_t {
append_thread_header(byte *buf_ptr, thread_id_t tid, offline_file_type_t file_type);
int
append_unit_header(byte *buf_ptr, thread_id_t tid, ptr_int_t window) override;
bool
refresh_unit_header_timestamp(byte *buf_ptr, uint64 min_timestamp) override;

int
instrument_memref(void *drcontext, void *bb_field, instrlist_t *ilist, instr_t *where,
Expand Down Expand Up @@ -420,6 +429,8 @@ class offline_instru_t : public instru_t {
append_thread_header(byte *buf_ptr, thread_id_t tid, offline_file_type_t file_type);
int
append_unit_header(byte *buf_ptr, thread_id_t tid, ptr_int_t window) override;
bool
refresh_unit_header_timestamp(byte *buf_ptr, uint64 min_timestamp) override;

int
instrument_memref(void *drcontext, void *bb_field, instrlist_t *ilist, instr_t *where,
Expand Down
15 changes: 13 additions & 2 deletions clients/drcachesim/tracer/instru_offline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,15 +397,26 @@ offline_instru_t::append_unit_header(byte *buf_ptr, thread_id_t tid, ptr_int_t w
byte *new_buf = buf_ptr;
offline_entry_t *entry = (offline_entry_t *)new_buf;
entry->timestamp.type = OFFLINE_TYPE_TIMESTAMP;
entry->timestamp.usec =
frozen_timestamp_ != 0 ? frozen_timestamp_ : instru_t::get_timestamp();
uint64 frozen = frozen_timestamp_.load(std::memory_order_acquire);
entry->timestamp.usec = frozen != 0 ? frozen : instru_t::get_timestamp();
new_buf += sizeof(*entry);
if (window >= 0)
new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_WINDOW_ID, (uintptr_t)window);
new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_CPU_ID, instru_t::get_cpu_id());
return (int)(new_buf - buf_ptr);
}

bool
offline_instru_t::refresh_unit_header_timestamp(byte *buf_ptr, uint64 min_timestamp)
{
offline_entry_t *stamp = reinterpret_cast<offline_entry_t *>(buf_ptr);
if (stamp->timestamp.type != OFFLINE_TYPE_TIMESTAMP)
return false;
if (stamp->timestamp.usec < min_timestamp)
stamp->timestamp.usec = instru_t::get_timestamp();
return true;
}

int
offline_instru_t::insert_save_entry(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t scratch, int adjust,
Expand Down
21 changes: 16 additions & 5 deletions clients/drcachesim/tracer/instru_online.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,17 +185,28 @@ online_instru_t::append_unit_header(byte *buf_ptr, thread_id_t tid, intptr_t win
{
byte *new_buf = buf_ptr;
new_buf += append_tid(new_buf, tid);
new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_TIMESTAMP,
// Truncated to 32 bits for 32-bit: we live with it.
static_cast<uintptr_t>(frozen_timestamp_ != 0
? frozen_timestamp_
: instru_t::get_timestamp()));
uint64 frozen = frozen_timestamp_.load(std::memory_order_acquire);
new_buf += append_marker(
new_buf, TRACE_MARKER_TYPE_TIMESTAMP,
// Truncated to 32 bits for 32-bit: we live with it.
static_cast<uintptr_t>(frozen != 0 ? frozen : instru_t::get_timestamp()));
if (window >= 0)
new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_WINDOW_ID, (uintptr_t)window);
new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_CPU_ID, instru_t::get_cpu_id());
return (int)(new_buf - buf_ptr);
}

bool
online_instru_t::refresh_unit_header_timestamp(byte *buf_ptr, uint64 min_timestamp)
{
trace_entry_t *stamp = reinterpret_cast<trace_entry_t *>(buf_ptr);
if (stamp->type != TRACE_TYPE_MARKER || stamp->size != TRACE_MARKER_TYPE_TIMESTAMP)
return false;
if (stamp->addr < min_timestamp)
stamp->addr = static_cast<uintptr_t>(instru_t::get_timestamp());
return true;
}

void
online_instru_t::insert_save_immed(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t base, reg_id_t scratch, ptr_int_t immed,
Expand Down
80 changes: 50 additions & 30 deletions clients/drcachesim/tracer/output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -445,15 +445,21 @@ open_new_thread_file(void *drcontext, ptr_int_t window_num)
return opened_new_file;
}

/* Returns the size of just the thread header, but appends both the thread header
* and the buffer header.
*/
static size_t
prepend_offline_thread_header(void *drcontext)
prepend_offline_thread_header(void *drcontext, ptr_int_t window = -1)
{
DR_ASSERT(op_offline.get_value());
/* Write initial headers at the top of the first buffer. */
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
size_t size = reinterpret_cast<offline_instru_t *>(instru)->append_thread_header(
data->buf_base, dr_get_thread_id(drcontext), get_file_type());
BUF_PTR(data->seg_base) = data->buf_base + size + buf_hdr_slots_size;
BUF_PTR(data->seg_base) = data->buf_base + size;
BUF_PTR(data->seg_base) += append_unit_header(
drcontext, BUF_PTR(data->seg_base), dr_get_thread_id(drcontext),
window == -1 ? get_local_window(data) : window);
data->has_thread_header = true;
return size;
}
Expand Down Expand Up @@ -581,7 +587,7 @@ set_local_window(void *drcontext, ptr_int_t value)
}
if ((value > 0 && op_split_windows.get_value()) ||
data->init_header_size == 0) {
size_t header_size = prepend_offline_thread_header(drcontext);
size_t header_size = prepend_offline_thread_header(drcontext, value);
if (data->init_header_size == 0)
data->init_header_size = header_size;
else
Expand Down Expand Up @@ -674,22 +680,6 @@ is_ok_to_split_before(trace_type_t type)
op_L0I_filter.get_value();
}

static size_t
add_buffer_header(void *drcontext, per_thread_t *data, byte *buf_base)
{
size_t header_size = 0;
// For online we already wrote the thread header but for offline it is in
// the first buffer, so skip over it.
if (data->has_thread_header && op_offline.get_value())
header_size = data->init_header_size;
data->has_thread_header = false;
// The initial slots are left empty for the header, which we add here.
header_size +=
append_unit_header(drcontext, buf_base + header_size, dr_get_thread_id(drcontext),
get_local_window(data));
return header_size;
}

static uint
output_buffer(void *drcontext, per_thread_t *data, byte *buf_base, byte *buf_ptr,
size_t header_size)
Expand Down Expand Up @@ -811,9 +801,12 @@ process_entry_for_physaddr(void *drcontext, per_thread_t *data, size_t header_si
ASSERT(size == data->init_header_size, "inconsistent header");
*skip = data->init_header_size;
v2p_ptr += size;
header_size += size;
}
v2p_ptr += add_buffer_header(drcontext, data, v2p_ptr);
// With timestamps at buffer start, we want to use the same timestamp+cpu
// to avoid out-of-order times.
memcpy(v2p_ptr, data->buf_base + header_size - buf_hdr_slots_size,
buf_hdr_slots_size);
v2p_ptr += buf_hdr_slots_size;
*emitted = true;
}
if (v2p_ptr + 2 * instru->sizeof_entry() - data->v2p_buf >=
Expand All @@ -822,7 +815,9 @@ process_entry_for_physaddr(void *drcontext, per_thread_t *data, size_t header_si
data->num_phys_markers +=
output_buffer(drcontext, data, data->v2p_buf, v2p_ptr, header_size);
v2p_ptr = data->v2p_buf;
v2p_ptr += add_buffer_header(drcontext, data, v2p_ptr);
memcpy(v2p_ptr, data->buf_base + header_size - buf_hdr_slots_size,
buf_hdr_slots_size);
v2p_ptr += buf_hdr_slots_size;
}
if (success) {
v2p_ptr +=
Expand Down Expand Up @@ -906,7 +901,6 @@ process_and_output_buffer(void *drcontext, bool skip_size_cap)
byte *mem_ref, *buf_ptr;
byte *redzone;
bool do_write = true;
size_t header_size = 0;
uint current_num_refs = 0;

if (op_offline.get_value() && data->file == INVALID_FILE) {
Expand All @@ -916,10 +910,32 @@ process_and_output_buffer(void *drcontext, bool skip_size_cap)
open_new_thread_file(drcontext, get_local_window(data));
}

size_t header_size = buf_hdr_slots_size;
// For online we already wrote the thread header but for offline it is in
// the first buffer.
if (data->has_thread_header && op_offline.get_value())
header_size += data->init_header_size;
data->has_thread_header = false;

if (align_attach_detach_endpoints()) {
// This is the attach counterpart to instru_t::set_frozen_timestamp(): we place
// timestamps at buffer creation, but that can be before we're fully attached.
// We update any too-early timestamps to reflect when we actually started
// tracing. (Switching back to timestamps at buffer output is actually
// worse as we then have the identical frozen timestamp for all the flushes
// during detach, plus they are all on the same cpu too.)
size_t stamp_offs =
header_size > buf_hdr_slots_size ? header_size - buf_hdr_slots_size : 0;
uint64 min_timestamp = attached_timestamp.load(std::memory_order_acquire);
bool found = instru->refresh_unit_header_timestamp(data->buf_base + stamp_offs,
min_timestamp);
DR_ASSERT(found);
}

buf_ptr = BUF_PTR(data->seg_base);
// We may get called with nothing to write: e.g., on a syscall for
// -L0I_filter and -L0D_filter.
if (buf_ptr == data->buf_base + header_size + buf_hdr_slots_size) {
if (buf_ptr == data->buf_base + header_size) {
if (has_tracing_windows()) {
// If there is no data to write, we do not emit an empty header here
// unless this thread is exiting (set_local_window will also write out
Expand All @@ -930,8 +946,6 @@ process_and_output_buffer(void *drcontext, bool skip_size_cap)
return;
}

header_size = add_buffer_header(drcontext, data, data->buf_base);

bool window_changed = false;
if (has_tracing_windows() &&
get_local_window(data) != tracing_window.load(std::memory_order_acquire)) {
Expand Down Expand Up @@ -1021,7 +1035,6 @@ process_and_output_buffer(void *drcontext, bool skip_size_cap)
memset(redzone, -1, buf_ptr - redzone);
}
}
BUF_PTR(data->seg_base) = data->buf_base + buf_hdr_slots_size;
num_refs_racy += current_num_refs;
if (op_exit_after_tracing.get_value() > 0 &&
num_refs_racy > op_exit_after_tracing.get_value()) {
Expand All @@ -1038,9 +1051,14 @@ process_and_output_buffer(void *drcontext, bool skip_size_cap)
}
dr_mutex_unlock(mutex);
}
ptr_int_t window = -1;
if (has_tracing_windows()) {
set_local_window(drcontext, tracing_window.load(std::memory_order_acquire));
window = tracing_window.load(std::memory_order_acquire);
set_local_window(drcontext, window);
}
BUF_PTR(data->seg_base) = data->buf_base +
append_unit_header(drcontext, data->buf_base, dr_get_thread_id(drcontext),
window);
}

void
Expand Down Expand Up @@ -1106,8 +1124,10 @@ init_thread_io(void *drcontext)
write_trace_data(drcontext, (byte *)buf, proc_info, get_local_window(data));

/* put buf_base to TLS plus header slots as starting buf_ptr */
data->init_header_size = buf_hdr_slots_size;
BUF_PTR(data->seg_base) = data->buf_base + buf_hdr_slots_size;
data->init_header_size =
append_unit_header(drcontext, data->buf_base, dr_get_thread_id(drcontext),
get_local_window(data));
BUF_PTR(data->seg_base) = data->buf_base + data->init_header_size;
}
}

Expand Down
9 changes: 9 additions & 0 deletions clients/drcachesim/tracer/tracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ size_t trace_buf_size;
size_t redzone_size;
size_t max_buf_size;

std::atomic<uint64> attached_timestamp;

static drvector_t scratch_reserve_vec;

/* per bb user data during instrumentation */
Expand Down Expand Up @@ -413,6 +415,7 @@ event_post_attach()
DR_ASSERT(attached_midway);
if (!align_attach_detach_endpoints())
return;
attached_timestamp.store(instru_t::get_timestamp(), std::memory_order_release);
if (op_trace_after_instrs.get_value() != 0) {
NOTIFY(1, "Switching to counting mode after attach\n");
tracing_mode.store(BBDUP_MODE_COUNT, std::memory_order_release);
Expand All @@ -427,6 +430,12 @@ event_pre_detach()
{
if (align_attach_detach_endpoints()) {
NOTIFY(1, "Switching to no-tracing mode during detach\n");
// Keep all final thread output at the detach timestamp. We won't add to any
// buffers during detach and we now have timestamps added at buffer start
// instead of output but a non-omitted thread that is at a wait syscall
// and so has an empty buffer would have its thread exit with a post-detach
// timestamp without this freeze.
instru->set_frozen_timestamp(instru_t::get_timestamp());
tracing_mode.store(BBDUP_MODE_NOP, std::memory_order_release);
}
}
Expand Down
1 change: 1 addition & 0 deletions clients/drcachesim/tracer/tracer.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ is_first_nonlabel(void *drcontext, instr_t *instr);
extern std::atomic<ptr_int_t> tracing_mode;
extern std::atomic<ptr_int_t> tracing_window;
extern bool attached_midway;
extern std::atomic<uint64> attached_timestamp;

/* We have multiple modes. While just 2 results in a more efficient dispatch,
* the power of extra modes justifies the extra overhead.
Expand Down

0 comments on commit 8f2123b

Please sign in to comment.