From 96bb0e4625c2700ff53f2bbf6bd6c7d0e12e51d2 Mon Sep 17 00:00:00 2001 From: Richard Startin Date: Wed, 1 May 2024 16:18:01 +0100 Subject: [PATCH] avoid unwinding the same stack for wallclock samples --- ddprof-lib/src/main/cpp/counters.h | 3 +- ddprof-lib/src/main/cpp/ctimer_linux.cpp | 6 ++-- ddprof-lib/src/main/cpp/event.h | 3 +- ddprof-lib/src/main/cpp/itimer.cpp | 4 +-- ddprof-lib/src/main/cpp/perfEvents_linux.cpp | 4 +-- ddprof-lib/src/main/cpp/profiler.cpp | 11 ++++-- ddprof-lib/src/main/cpp/profiler.h | 8 +++-- ddprof-lib/src/main/cpp/thread.cpp | 16 --------- ddprof-lib/src/main/cpp/thread.h | 38 +++++++++++++++----- ddprof-lib/src/main/cpp/wallClock.cpp | 17 +++++---- 10 files changed, 65 insertions(+), 45 deletions(-) diff --git a/ddprof-lib/src/main/cpp/counters.h b/ddprof-lib/src/main/cpp/counters.h index 5c2c58152..f822dcf5b 100644 --- a/ddprof-lib/src/main/cpp/counters.h +++ b/ddprof-lib/src/main/cpp/counters.h @@ -60,7 +60,8 @@ X(AGCT_NATIVE_NO_JAVA_CONTEXT, "agct_native_no_java_context") \ X(AGCT_BLOCKED_IN_VM, "agct_blocked_in_vm") \ X(HANDLED_SIGSEGV_SAFEFETCH, "handled_sigsegv_safefetch") \ - X(HANDLED_SIGSEGV_WALKVM, "handled_sigsegv_walkvm") + X(HANDLED_SIGSEGV_WALKVM, "handled_sigsegv_walkvm") \ + X(SKIPPED_WALLCLOCK_UNWINDS, "skipped_wallclock_unwinds") #define X_ENUM(a, b) a, typedef enum CounterId : int { DD_COUNTER_TABLE(X_ENUM) DD_NUM_COUNTERS diff --git a/ddprof-lib/src/main/cpp/ctimer_linux.cpp b/ddprof-lib/src/main/cpp/ctimer_linux.cpp index 5de2b9a7b..5b9c3b0b7 100644 --- a/ddprof-lib/src/main/cpp/ctimer_linux.cpp +++ b/ddprof-lib/src/main/cpp/ctimer_linux.cpp @@ -193,7 +193,7 @@ void CTimer::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) { int tid = 0; ProfiledThread* current = ProfiledThread::current(); if (current != NULL) { - current->noteCPUSample(); + current->noteCPUSample(Profiler::instance()->recordingEpoch()); tid = current->tid(); } else { tid = OS::threadId(); @@ -207,8 +207,8 @@ void CTimer::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) { ? convertJvmExecutionState(vm_thread->state()) : ExecutionMode::JVM; } - Profiler::instance()->recordSample(ucontext, _interval, tid, BCI_CPU, &event); + Profiler::instance()->recordSample(ucontext, _interval, tid, BCI_CPU, 0, &event); Shims::instance().setSighandlerTid(-1); } -#endif // __linux__ \ No newline at end of file +#endif // __linux__ diff --git a/ddprof-lib/src/main/cpp/event.h b/ddprof-lib/src/main/cpp/event.h index 1bf33ba7e..56e40b24b 100644 --- a/ddprof-lib/src/main/cpp/event.h +++ b/ddprof-lib/src/main/cpp/event.h @@ -40,8 +40,9 @@ class ExecutionEvent : public Event { ThreadState _thread_state; ExecutionMode _execution_mode; u64 _weight; + u32 _call_trace_id; - ExecutionEvent() : Event(), _thread_state(ThreadState::RUNNABLE), _weight(1), _execution_mode(ExecutionMode::UNKNOWN) {} + ExecutionEvent() : Event(), _thread_state(ThreadState::RUNNABLE), _weight(1), _execution_mode(ExecutionMode::UNKNOWN), _call_trace_id(0) {} }; class AllocEvent : public Event { diff --git a/ddprof-lib/src/main/cpp/itimer.cpp b/ddprof-lib/src/main/cpp/itimer.cpp index 79ce9a2b5..759d0fada 100644 --- a/ddprof-lib/src/main/cpp/itimer.cpp +++ b/ddprof-lib/src/main/cpp/itimer.cpp @@ -32,7 +32,7 @@ void ITimer::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) { int tid = 0; ProfiledThread* current = ProfiledThread::current(); if (current != NULL) { - current->noteCPUSample(); + current->noteCPUSample(Profiler::instance()->recordingEpoch()); tid = current->tid(); } else { tid = OS::threadId(); @@ -46,7 +46,7 @@ void ITimer::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) { ? convertJvmExecutionState(vm_thread->state()) : ExecutionMode::JVM; } - Profiler::instance()->recordSample(ucontext, _interval, tid, BCI_CPU, &event); + Profiler::instance()->recordSample(ucontext, _interval, tid, BCI_CPU, 0, &event); Shims::instance().setSighandlerTid(-1); } diff --git a/ddprof-lib/src/main/cpp/perfEvents_linux.cpp b/ddprof-lib/src/main/cpp/perfEvents_linux.cpp index 1d466ed8b..978a9e622 100644 --- a/ddprof-lib/src/main/cpp/perfEvents_linux.cpp +++ b/ddprof-lib/src/main/cpp/perfEvents_linux.cpp @@ -705,7 +705,7 @@ void PerfEvents::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) { ProfiledThread* current = ProfiledThread::current(); if (current != NULL) { - current->noteCPUSample(); + current->noteCPUSample(Profiler::instance()->recordingEpoch()); } int tid = current != NULL ? current->tid() : OS::threadId(); if (_enabled) { @@ -719,7 +719,7 @@ void PerfEvents::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) { ? convertJvmExecutionState(vm_thread->state()) : ExecutionMode::JVM; } - Profiler::instance()->recordSample(ucontext, counter, tid, BCI_CPU, &event); + Profiler::instance()->recordSample(ucontext, counter, tid, BCI_CPU, 0, &event); Shims::instance().setSighandlerTid(-1); } else { resetBuffer(tid); diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index d2570cf63..b077ea29c 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -647,7 +647,7 @@ void Profiler::recordExternalSample(u64 counter, int tid, jvmtiFrameInfo *jvmti_ _locks[lock_index].unlock(); } -void Profiler::recordSample(void* ucontext, u64 counter, int tid, jint event_type, Event* event) { +void Profiler::recordSample(void* ucontext, u64 counter, int tid, jint event_type, u32 call_trace_id, Event* event) { atomicInc(_total_samples); u32 lock_index = getLockIndex(tid); @@ -668,8 +668,8 @@ void Profiler::recordSample(void* ucontext, u64 counter, int tid, jint event_typ bool truncated = false; // in lightweight mode we're just sampling the the context associated with the passage of CPU or wall time, // we use the same event definitions but we record a null stacktrace - u32 call_trace_id = 0; - if (!_omit_stacktraces) { + // we can skip the unwind if we've got a call_trace_id determined to be reusable at a higher level + if (!_omit_stacktraces && call_trace_id == 0) { ASGCT_CallFrame *frames = _calltrace_buffer[lock_index]->_asgct_frames; int num_frames = 0; @@ -703,6 +703,10 @@ void Profiler::recordSample(void* ucontext, u64 counter, int tid, jint event_typ } call_trace_id = _call_trace_storage.put(num_frames, frames, truncated, counter); + ProfiledThread* thread = ProfiledThread::current(); + if (thread != nullptr) { + thread->recordCallTraceId(call_trace_id); + } } _jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event, counter); @@ -1252,6 +1256,7 @@ Error Profiler::dump(const char* path, const int length) { lockAll(); Error err = _jfr.dump(path, length); + _epoch++; // Reset calltrace storage if (!_omit_stacktraces) { diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index 8be2898e0..3f4b77f17 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -127,7 +127,7 @@ class Profiler { time_t _start_time; time_t _stop_time; - int _epoch; + u32 _epoch; WaitableMutex _timer_lock; void* _timer_id; @@ -252,6 +252,10 @@ class Profiler { } } + inline u32 recordingEpoch() { + return _epoch; + } + Error run(Arguments& args); Error runInternal(Arguments& args, std::ostream& out); Error restart(Arguments& args); @@ -263,7 +267,7 @@ class Profiler { Error dump(const char* path, const int length); void switchThreadEvents(jvmtiEventMode mode); int convertNativeTrace(int native_frames, const void** callchain, ASGCT_CallFrame* frames); - void recordSample(void* ucontext, u64 counter, int tid, jint event_type, Event* event); + void recordSample(void* ucontext, u64 counter, int tid, jint event_type, u32 call_trace_id, Event* event); void recordExternalSample(u64 counter, int tid, jvmtiFrameInfo *jvmti_frames, jint num_jvmti_frames, bool truncated, jint event_type, Event* event); void recordExternalSample(u64 counter, int tid, int num_frames, ASGCT_CallFrame* frames, bool truncated, jint event_type, Event* event); void recordWallClockEpoch(int tid, WallClockEpochEvent* event); diff --git a/ddprof-lib/src/main/cpp/thread.cpp b/ddprof-lib/src/main/cpp/thread.cpp index d291c858e..dd59ce20a 100644 --- a/ddprof-lib/src/main/cpp/thread.cpp +++ b/ddprof-lib/src/main/cpp/thread.cpp @@ -158,22 +158,6 @@ void ProfiledThread::releaseFromBuffer() { } } -bool ProfiledThread::noteWallSample(u64 context_key, u64* skipped_samples) { - if (_wall_epoch == _cpu_epoch && _context_key == context_key) { - *skipped_samples = ++_skipped_samples; - if (_skipped_samples % 10 != 0) { - _skipped_samples = 0; - return true; - } - return false; - } - _context_key = context_key; - _wall_epoch = _cpu_epoch; - *skipped_samples = _skipped_samples; - _skipped_samples = 0; - return true; -} - int ProfiledThread::currentTid() { ProfiledThread* tls = current(); if (tls != NULL) { diff --git a/ddprof-lib/src/main/cpp/thread.h b/ddprof-lib/src/main/cpp/thread.h index f2b5b714f..b2df07ce1 100644 --- a/ddprof-lib/src/main/cpp/thread.h +++ b/ddprof-lib/src/main/cpp/thread.h @@ -26,10 +26,12 @@ class ProfiledThread { int _buffer_pos; int _tid; - u64 _cpu_epoch; - u64 _wall_epoch; - u64 _skipped_samples; - u64 _context_key; + u32 _cpu_epoch; + u32 _wall_epoch; + u64 _pc; + u32 _call_trace_id; + u32 _recording_epoch; + u64 _span_id; bool _unwinding_java; ProfiledThread(int buffer_pos, int tid) : @@ -37,8 +39,10 @@ class ProfiledThread { _tid(tid), _cpu_epoch(0), _wall_epoch(0), - _skipped_samples(0), - _context_key(0), + _pc(0), + _call_trace_id(0), + _recording_epoch(0), + _span_id(0), _unwinding_java(false){}; void releaseFromBuffer(); @@ -68,10 +72,28 @@ class ProfiledThread { return _tid; } - inline u64 noteCPUSample() { + inline u64 noteCPUSample(u32 recording_epoch) { + _recording_epoch = recording_epoch; return ++_cpu_epoch; } - bool noteWallSample(u64 context_key, u64* skipped_samples); + + u32 lookupWallclockCallTraceId(u64 pc, u32 recording_epoch, u64 span_id) { + if (_wall_epoch == _cpu_epoch + && _pc == pc + && _span_id == span_id + && _recording_epoch == recording_epoch + && _call_trace_id != 0) { + return _call_trace_id; + } + _wall_epoch = _cpu_epoch; + _pc = pc; + _recording_epoch = recording_epoch; + return 0; + } + + inline void recordCallTraceId(u32 call_trace_id) { + _call_trace_id = call_trace_id; + } static void signalHandler(int signo, siginfo_t* siginfo, void* ucontext); }; diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index 50951cc7d..db5e9a272 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -60,11 +60,14 @@ void WallClock::signalHandler(int signo, siginfo_t* siginfo, void* ucontext, u64 ProfiledThread* current = ProfiledThread::current(); int tid = current != NULL ? current->tid() : OS::threadId(); Shims::instance().setSighandlerTid(tid); - Context& ctx = Contexts::get(tid); - u64 skipped = 0; - if (current != NULL) { - if (_collapsing && !current->noteWallSample(ctx.spanId, &skipped)) { - return; + u32 call_trace_id = 0; + if (current != NULL && _collapsing) { + StackFrame frame(ucontext); + Context& context = Contexts::get(tid); + call_trace_id = current->lookupWallclockCallTraceId((u64) frame.pc(), Profiler::instance()->recordingEpoch(), + context.spanId); + if (call_trace_id != 0) { + Counters::increment(SKIPPED_WALLCLOCK_UNWINDS); } } @@ -91,8 +94,8 @@ void WallClock::signalHandler(int signo, siginfo_t* siginfo, void* ucontext, u64 } event._thread_state = state; event._execution_mode = mode; - event._weight = skipped + 1; - Profiler::instance()->recordSample(ucontext, last_sample, tid, BCI_WALL, &event); + event._weight = 1; + Profiler::instance()->recordSample(ucontext, last_sample, tid, BCI_WALL, call_trace_id, &event); Shims::instance().setSighandlerTid(-1); }