Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 20 additions & 7 deletions ddprof-lib/src/main/cpp/arch_dd.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,32 @@ static inline long long atomicInc(volatile long long &var,
return __sync_fetch_and_add(&var, increment);
}

static inline u64 loadAcquire(volatile u64 &var) {
return __atomic_load_n(&var, __ATOMIC_ACQUIRE);
template <typename T>
static inline long long atomicIncRelaxed(volatile T &var,
T increment = 1) {
return __atomic_fetch_add(&var, increment, __ATOMIC_RELAXED);
}

static inline size_t loadAcquire(volatile size_t &var) {
return __atomic_load_n(&var, __ATOMIC_ACQUIRE);
// Atomic load/store (unordered)
template <typename T>
static inline T load(volatile T& var) {
return __atomic_load_n(&var, __ATOMIC_RELAXED);
}

static inline void storeRelease(volatile long long &var, long long value) {
return __atomic_store_n(&var, value, __ATOMIC_RELEASE);
template <typename T>
static inline void store(volatile T& var, T value) {
return __atomic_store_n(&var, value, __ATOMIC_RELAXED);
}


// Atomic load-acquire/release-store
template <typename T>
static inline T loadAcquire(volatile T& var) {
return __atomic_load_n(&var, __ATOMIC_ACQUIRE);
}

static inline void storeRelease(volatile size_t &var, size_t value) {
template <typename T>
static inline void storeRelease(volatile T& var, T value) {
return __atomic_store_n(&var, value, __ATOMIC_RELEASE);
}

Expand Down
4 changes: 2 additions & 2 deletions ddprof-lib/src/main/cpp/callTraceHashTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ u64 CallTraceHashTable::put(int num_frames, ASGCT_CallFrame *frames,

if (++step >= capacity) {
// Very unlikely case of a table overflow
atomicInc(_overflow);
atomicIncRelaxed(_overflow);
return OVERFLOW_TRACE_ID;
}
// Improved version of linear probing
Expand Down Expand Up @@ -359,7 +359,7 @@ void CallTraceHashTable::collectAndCopySelective(std::unordered_set<CallTrace *>
traces.insert(&_overflow_trace);
if (trace_ids_to_preserve.find(OVERFLOW_TRACE_ID) != trace_ids_to_preserve.end()) {
// Copy overflow trace to target - it's a static trace so just increment overflow counter
atomicInc(target->_overflow);
atomicIncRelaxed(target->_overflow);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion ddprof-lib/src/main/cpp/callTraceHashTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class CallTraceHashTable {

LinearAllocator _allocator;
LongHashTable *_current_table;
u64 _overflow;
volatile u64 _overflow;

u64 calcHash(int num_frames, ASGCT_CallFrame *frames, bool truncated);
CallTrace *storeCallTrace(int num_frames, ASGCT_CallFrame *frames,
Expand Down
6 changes: 3 additions & 3 deletions ddprof-lib/src/main/cpp/counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ class Counters {
static void increment(CounterId counter, long long delta = 1,
int offset = 0) {
#ifdef COUNTERS
atomicInc(Counters::instance()
._counters[address(static_cast<int>(counter) + offset)],
delta);
atomicIncRelaxed(Counters::instance()
._counters[address(static_cast<int>(counter) + offset)],
delta);
#endif // COUNTERS
}

Expand Down
12 changes: 6 additions & 6 deletions ddprof-lib/src/main/cpp/livenessTracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ constexpr int LivenessTracker::MAX_TRACKING_TABLE_SIZE;
constexpr int LivenessTracker::MIN_SAMPLING_INTERVAL;

void LivenessTracker::cleanup_table(bool forced) {
u64 current = loadAcquire(_last_gc_epoch);
u64 target_gc_epoch = loadAcquire(_gc_epoch);
u64 current = load(_last_gc_epoch);
u64 target_gc_epoch = load(_gc_epoch);

if ((target_gc_epoch == _last_gc_epoch ||
!__sync_bool_compare_and_swap(&_last_gc_epoch, current,
target_gc_epoch)) &&
!__atomic_compare_exchange_n(&_last_gc_epoch, &current,
target_gc_epoch, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) &&
!forced) {
// if the last processed GC epoch hasn't changed, or if we failed to update
// it, there's nothing to do
Expand Down Expand Up @@ -383,10 +383,10 @@ void LivenessTracker::onGC() {
}

// just increment the epoch
atomicInc(_gc_epoch, 1);
atomicIncRelaxed(_gc_epoch,u64(1));

if (!ddprof::HeapUsage::isLastGCUsageSupported()) {
storeRelease(_used_after_last_gc, ddprof::HeapUsage::get(false)._used);
store(_used_after_last_gc, ddprof::HeapUsage::get(false)._used);
}
}

Expand Down
18 changes: 9 additions & 9 deletions ddprof-lib/src/main/cpp/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ int Profiler::getJavaTraceAsync(void *ucontext, ASGCT_CallFrame *frames,
return 0;
}

atomicInc(_failures[-trace.num_frames]);
atomicIncRelaxed(_failures[-trace.num_frames]);
trace.frames->bci = BCI_ERROR;
trace.frames->method_id = (jmethodID)err_string;
return trace.frames - frames + 1;
Expand Down Expand Up @@ -607,14 +607,14 @@ void Profiler::fillFrameTypes(ASGCT_CallFrame *frames, int num_frames,
}

u64 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event_type, Event *event, bool deferred) {
atomicInc(_total_samples);
atomicIncRelaxed(_total_samples);

u32 lock_index = getLockIndex(tid);
if (!_locks[lock_index].tryLock() &&
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
// Too many concurrent signals already
atomicInc(_failures[-ticks_skipped]);
atomicIncRelaxed(_failures[-ticks_skipped]);

return 0;
}
Expand Down Expand Up @@ -654,14 +654,14 @@ u64 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event
}

void Profiler::recordDeferredSample(int tid, u64 call_trace_id, jint event_type, Event *event) {
atomicInc(_total_samples);
atomicIncRelaxed(_total_samples);

u32 lock_index = getLockIndex(tid);
if (!_locks[lock_index].tryLock() &&
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
// Too many concurrent signals already
atomicInc(_failures[-ticks_skipped]);
atomicIncRelaxed(_failures[-ticks_skipped]);
return;
}

Expand All @@ -672,14 +672,14 @@ void Profiler::recordDeferredSample(int tid, u64 call_trace_id, jint event_type,

void Profiler::recordSample(void *ucontext, u64 counter, int tid,
jint event_type, u64 call_trace_id, Event *event) {
atomicInc(_total_samples);
atomicIncRelaxed(_total_samples);

u32 lock_index = getLockIndex(tid);
if (!_locks[lock_index].tryLock() &&
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
// Too many concurrent signals already
atomicInc(_failures[-ticks_skipped]);
atomicIncRelaxed(_failures[-ticks_skipped]);

if (event_type == BCI_CPU && _cpu_engine == &perf_events) {
// Need to reset PerfEvents ring buffer, even though we discard the
Expand Down Expand Up @@ -788,7 +788,7 @@ void Profiler::recordQueueTime(int tid, QueueTimeEvent *event) {
void Profiler::recordExternalSample(u64 weight, int tid, int num_frames,
ASGCT_CallFrame *frames, bool truncated,
jint event_type, Event *event) {
atomicInc(_total_samples);
atomicIncRelaxed(_total_samples);

u64 call_trace_id =
_call_trace_storage.put(num_frames, frames, truncated, weight);
Expand All @@ -798,7 +798,7 @@ void Profiler::recordExternalSample(u64 weight, int tid, int num_frames,
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
// Too many concurrent signals already
atomicInc(_failures[-ticks_skipped]);
atomicIncRelaxed(_failures[-ticks_skipped]);
return;
}

Expand Down
2 changes: 1 addition & 1 deletion ddprof-lib/src/main/cpp/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class Profiler {
WaitableMutex _timer_lock;
void *_timer_id;

u64 _total_samples;
volatile u64 _total_samples;
u64 _failures[ASGCT_FAILURE_TYPES];

SpinLock _class_map_lock;
Expand Down
Loading