Skip to content

Commit f4fb779

Browse files
authored
[Profiler] Implement sampling for exceptions (DataDog#2823)
1 parent 09a7ea7 commit f4fb779

29 files changed

+800
-12
lines changed

profiler/build/crank/Samples.AspNetCoreSimpleController.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,20 @@ scenarios:
3333
serverPort: 5000
3434
path: /hello
3535

36+
profiler_exceptions_baseline:
37+
application:
38+
job: server
39+
environmentVariables:
40+
COR_ENABLE_PROFILING: 0
41+
CORECLR_ENABLE_PROFILING: 0
42+
load:
43+
job: bombardier
44+
variables:
45+
warmup: 30
46+
duration: 240
47+
serverPort: 5000
48+
path: /hello/Exception
49+
3650
profiler:
3751
application:
3852
job: server

profiler/build/crank/run.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ if [ "$1" = "windows" ]; then
3838
dd-trace --crank-import="profiler_windows.json"
3939
rm profiler_windows.json
4040

41+
crank --config Samples.AspNetCoreSimpleController.yml --scenario profiler_exceptions_baseline --profile windows --json profiler_exceptions_baseline_windows.json $repository $commit --property name=AspNetCoreSimpleController --property scenario=profiler_exceptions_baseline --property profile=windows --property arch=x64 --variable commit_hash=$commit_sha
42+
dd-trace --crank-import="profiler_exceptions_baseline_windows.json"
43+
rm profiler_exceptions_baseline_windows.json
44+
4145
crank --config Samples.AspNetCoreSimpleController.yml --scenario profiler_exceptions --profile windows --json profiler_exceptions_windows.json $repository $commit --property name=AspNetCoreSimpleController --property scenario=profiler_exceptions --property profile=windows --property arch=x64 --variable commit_hash=$commit_sha
4246
dd-trace --crank-import="profiler_exceptions_windows.json"
4347
rm profiler_exceptions_windows.json
@@ -53,6 +57,10 @@ elif [ "$1" = "linux" ]; then
5357
dd-trace --crank-import="profiler_linux.json"
5458
rm profiler_linux.json
5559

60+
crank --config Samples.AspNetCoreSimpleController.yml --scenario profiler_exceptions_baseline --profile linux --json profiler_exceptions_baseline_linux.json $repository $commit --property name=AspNetCoreSimpleController --property scenario=profiler_exceptions_baseline --property profile=linux --property arch=x64 --variable commit_hash=$commit_sha
61+
dd-trace --crank-import="profiler_exceptions_baseline_linux.json"
62+
rm profiler_exceptions_baseline_linux.json
63+
5664
crank --config Samples.AspNetCoreSimpleController.yml --scenario profiler_exceptions --profile linux --json profiler_exceptions_linux.json $repository $commit --property name=AspNetCoreSimpleController --property scenario=profiler_exceptions --property profile=linux --property arch=x64 --variable commit_hash=$commit_sha
5765
dd-trace --crank-import="profiler_exceptions_linux.json"
5866
rm profiler_exceptions_linux.json

profiler/src/Demos/Samples.ExceptionGenerator/ExceptionGenerator.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ internal class ExceptionGenerator
1515
private const bool PrintExceptionsToConsole = false;
1616

1717
private static readonly TimeSpan StatsPeriodDuration = TimeSpan.FromSeconds(5);
18+
private static int _count;
1819
private readonly Thread _thread;
1920
private volatile bool _isStopped;
20-
private static int _count;
2121

2222
public ExceptionGenerator()
2323
{

profiler/src/Demos/Samples.ExceptionGenerator/Program.cs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ namespace Samples.ExceptionGenerator
1313
public enum Scenario
1414
{
1515
ExceptionsProfilerTest = 1,
16-
ParallelExceptions = 2
16+
ParallelExceptions = 2,
17+
Sampling = 3
1718
}
1819

1920
public class Program
@@ -56,6 +57,14 @@ public static void Main(string[] args)
5657
Thread.Sleep(20_000);
5758
break;
5859

60+
case Scenario.Sampling:
61+
new SamplingScenario().Run();
62+
63+
// TODO: Remove the sleep when flush on shutdown is implemented in the profiler
64+
Console.WriteLine(" ########### Sleeping for 20 seconds");
65+
Thread.Sleep(20_000);
66+
break;
67+
5968
default:
6069
Console.WriteLine($" ########### Unknown scenario: {scenario}.");
6170
break;
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// <copyright file="SamplingScenario.cs" company="Datadog">
2+
// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License.
3+
// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2022 Datadog, Inc.
4+
// </copyright>
5+
6+
using System;
7+
8+
namespace Samples.ExceptionGenerator
9+
{
10+
internal class SamplingScenario
11+
{
12+
public void Run()
13+
{
14+
// First, throw 4000 exceptions
15+
new ParallelExceptionsScenario().Run();
16+
17+
// Then, throw an exception of a type that wasn't seen before
18+
try
19+
{
20+
throw new InvalidOperationException("OK");
21+
}
22+
catch
23+
{
24+
}
25+
}
26+
}
27+
}

profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/Datadog.Profiler.Native.Windows.vcxproj

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@
100100
<ClCompile>
101101
<WarningLevel>Level3</WarningLevel>
102102
<SDLCheck>true</SDLCheck>
103-
<PreprocessorDefinitions>WIN32;_DEBUG;DATADOGAUTOINSTRUMENTATIONPROFILERNATIVEWINDOWS_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
103+
<PreprocessorDefinitions>WIN32;_DEBUG;DATADOGAUTOINSTRUMENTATIONPROFILERNATIVEWINDOWS_EXPORTS;_WINDOWS;_USRDLL;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
104104
<ConformanceMode>true</ConformanceMode>
105105
<PrecompiledHeader>NotUsing</PrecompiledHeader>
106106
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
@@ -130,7 +130,7 @@
130130
<FunctionLevelLinking>true</FunctionLevelLinking>
131131
<IntrinsicFunctions>true</IntrinsicFunctions>
132132
<SDLCheck>true</SDLCheck>
133-
<PreprocessorDefinitions>WIN32;NDEBUG;DATADOGAUTOINSTRUMENTATIONPROFILERNATIVEWINDOWS_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
133+
<PreprocessorDefinitions>WIN32;NDEBUG;DATADOGAUTOINSTRUMENTATIONPROFILERNATIVEWINDOWS_EXPORTS;_WINDOWS;_USRDLL;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
134134
<ConformanceMode>true</ConformanceMode>
135135
<PrecompiledHeader>NotUsing</PrecompiledHeader>
136136
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
@@ -160,7 +160,7 @@
160160
<ClCompile>
161161
<WarningLevel>Level3</WarningLevel>
162162
<SDLCheck>true</SDLCheck>
163-
<PreprocessorDefinitions>BIT64;_DEBUG;DATADOGAUTOINSTRUMENTATIONPROFILERNATIVEWINDOWS_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
163+
<PreprocessorDefinitions>BIT64;_DEBUG;DATADOGAUTOINSTRUMENTATIONPROFILERNATIVEWINDOWS_EXPORTS;_WINDOWS;_USRDLL;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
164164
<ConformanceMode>true</ConformanceMode>
165165
<PrecompiledHeader>NotUsing</PrecompiledHeader>
166166
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
@@ -190,7 +190,7 @@
190190
<FunctionLevelLinking>true</FunctionLevelLinking>
191191
<IntrinsicFunctions>true</IntrinsicFunctions>
192192
<SDLCheck>true</SDLCheck>
193-
<PreprocessorDefinitions>BIT64;NDEBUG;DATADOGAUTOINSTRUMENTATIONPROFILERNATIVEWINDOWS_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
193+
<PreprocessorDefinitions>BIT64;NDEBUG;DATADOGAUTOINSTRUMENTATIONPROFILERNATIVEWINDOWS_EXPORTS;_WINDOWS;_USRDLL;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
194194
<ConformanceMode>true</ConformanceMode>
195195
<PrecompiledHeader>NotUsing</PrecompiledHeader>
196196
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License.
2+
// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2022 Datadog, Inc.
3+
4+
#ifdef _WINDOWS
5+
6+
#include "Timer.h"
7+
8+
Timer::Timer(std::function<void()> callback, std::chrono::milliseconds period) :
9+
_callback(std::move(callback)),
10+
_period(period),
11+
_internalTimer(nullptr)
12+
{
13+
}
14+
15+
Timer::~Timer()
16+
{
17+
if (_internalTimer != nullptr)
18+
{
19+
// First, cancel the timer to make sure no callback is executing
20+
// https://docs.microsoft.com/en-us/windows/win32/api/threadpoolapiset/nf-threadpoolapiset-closethreadpooltimer#remarks
21+
SetThreadpoolTimer(_internalTimer, nullptr, 0, 0);
22+
WaitForThreadpoolTimerCallbacks(_internalTimer, true);
23+
24+
CloseThreadpoolTimer(_internalTimer);
25+
26+
_internalTimer = nullptr;
27+
}
28+
}
29+
30+
void Timer::Start()
31+
{
32+
_internalTimer = CreateThreadpoolTimer(&OnTick, &_callback, nullptr);
33+
34+
ULARGE_INTEGER rawDueTime;
35+
rawDueTime.QuadPart = _period.count() * -1 * 10 /* microseconds */ * 1000 /* milliseconds */;
36+
37+
FILETIME dueTime;
38+
dueTime.dwHighDateTime = rawDueTime.HighPart;
39+
dueTime.dwLowDateTime = rawDueTime.LowPart;
40+
41+
SetThreadpoolTimer(_internalTimer, &dueTime, _period.count(), 100);
42+
}
43+
44+
void NTAPI Timer::OnTick(
45+
PTP_CALLBACK_INSTANCE Instance,
46+
PVOID Context,
47+
PTP_TIMER Timer)
48+
{
49+
const auto callback = static_cast<std::function<void()>*>(Context);
50+
(*callback)();
51+
}
52+
53+
#endif
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
#include "AdaptiveSampler.h"
2+
3+
#include "Log.h"
4+
5+
#include <algorithm>
6+
#include <stdexcept>
7+
8+
void AdaptiveSampler::Counts::AddTest()
9+
{
10+
_testCount.fetch_add(1);
11+
}
12+
13+
bool AdaptiveSampler::Counts::AddSample(int64_t limit)
14+
{
15+
int64_t previousValue;
16+
int64_t newValue;
17+
18+
do
19+
{
20+
previousValue = _sampleCount.load();
21+
newValue = std::min<int64_t>(previousValue + 1, limit);
22+
} while (!_sampleCount.compare_exchange_strong(previousValue, newValue));
23+
24+
return newValue < limit;
25+
}
26+
27+
void AdaptiveSampler::Counts::AddSample()
28+
{
29+
_sampleCount.fetch_add(1);
30+
}
31+
32+
void AdaptiveSampler::Counts::Reset()
33+
{
34+
_testCount = 0;
35+
_sampleCount = 0;
36+
}
37+
38+
int64_t AdaptiveSampler::Counts::SampleCount()
39+
{
40+
return _sampleCount;
41+
}
42+
43+
int64_t AdaptiveSampler::Counts::TestCount()
44+
{
45+
return _testCount;
46+
}
47+
48+
AdaptiveSampler::AdaptiveSampler(
49+
std::chrono::milliseconds windowDuration,
50+
int32_t samplesPerWindow,
51+
int32_t averageLookback,
52+
int32_t budgetLookback,
53+
std::function<void()> rollWindowCallback) :
54+
_timer([this] { RollWindow(); }, windowDuration),
55+
_totalCountRunningAverage(0),
56+
_rollWindowCallback(std::move(rollWindowCallback)),
57+
_avgSamples(0),
58+
_countsSlots{}
59+
{
60+
if (averageLookback < 1)
61+
{
62+
Log::Error("AdaptiveSampler: 'averageLookback' argument must be at least 1");
63+
averageLookback = 1;
64+
}
65+
66+
if (budgetLookback < 1)
67+
{
68+
Log::Error("AdaptiveSampler: 'budgetLookback' argument must be at least 1");
69+
budgetLookback = 1;
70+
}
71+
72+
_samplesPerWindow = samplesPerWindow;
73+
_budgetLookback = budgetLookback;
74+
75+
_samplesBudget = samplesPerWindow + (static_cast<int64_t>(budgetLookback) * samplesPerWindow);
76+
_emaAlpha = ComputeIntervalAlpha(averageLookback);
77+
_budgetAlpha = ComputeIntervalAlpha(budgetLookback);
78+
79+
_countsRef = &_countsSlots[0];
80+
81+
// Initialize RNG
82+
std::random_device rd;
83+
_rng = std::mt19937(rd());
84+
_distribution = std::uniform_real_distribution<>(0.0, 1.0);
85+
86+
if (windowDuration != std::chrono::milliseconds::zero())
87+
{
88+
_timer.Start();
89+
}
90+
}
91+
92+
bool AdaptiveSampler::Sample()
93+
{
94+
auto* counts = _countsRef.load();
95+
counts->AddTest();
96+
97+
if (NextDouble() < _probability)
98+
{
99+
return counts->AddSample(_samplesBudget);
100+
}
101+
102+
return false;
103+
}
104+
105+
bool AdaptiveSampler::Keep()
106+
{
107+
auto* counts = _countsRef.load();
108+
counts->AddTest();
109+
counts->AddSample();
110+
return true;
111+
}
112+
113+
bool AdaptiveSampler::Drop()
114+
{
115+
auto* counts = _countsRef.load();
116+
counts->AddTest();
117+
return false;
118+
}
119+
120+
double AdaptiveSampler::NextDouble()
121+
{
122+
std::lock_guard lock(_rngMutex);
123+
return _distribution(_rng);
124+
}
125+
126+
double AdaptiveSampler::ComputeIntervalAlpha(int32_t lookback)
127+
{
128+
return 1 - pow(lookback, -1.0 / lookback);
129+
}
130+
131+
int64_t AdaptiveSampler::CalculateBudgetEma(int64_t sampledCount)
132+
{
133+
_avgSamples = std::isnan(_avgSamples) || _budgetAlpha <= 0.0
134+
? sampledCount
135+
: _avgSamples + _budgetAlpha * (sampledCount - _avgSamples);
136+
137+
return llround(std::max(_samplesPerWindow - _avgSamples, 0.0) * _budgetLookback);
138+
}
139+
140+
void AdaptiveSampler::RollWindow()
141+
{
142+
auto& counts = _countsSlots[_countsSlotIndex];
143+
144+
/*
145+
* Semi-atomically replace the Counts instance such that sample requests during window maintenance will be
146+
* using the newly created counts instead of the ones currently processed by the maintenance routine.
147+
* We are ok with slightly racy outcome where totaCount and sampledCount may not be totally in sync
148+
* because it allows to avoid contention in the hot-path and the effect on the overall sample rate is minimal
149+
* and will get compensated in the long run.
150+
* Theoretically, a compensating system might be devised but it will always require introducing a single point
151+
* of contention and add a fair amount of complexity. Considering that we are ok with keeping the target sampling
152+
* rate within certain error margins and this data race is not breaking the margin it is better to keep the
153+
* code simple and reasonably fast.
154+
*/
155+
156+
_countsSlotIndex = (_countsSlotIndex + 1) % 2;
157+
_countsRef = &_countsSlots[_countsSlotIndex];
158+
const auto totalCount = counts.TestCount();
159+
const auto sampledCount = counts.SampleCount();
160+
161+
_samplesBudget = CalculateBudgetEma(sampledCount);
162+
163+
if (_totalCountRunningAverage == 0 || _emaAlpha <= 0.0)
164+
{
165+
_totalCountRunningAverage = totalCount;
166+
}
167+
else
168+
{
169+
_totalCountRunningAverage = _totalCountRunningAverage + _emaAlpha * (totalCount - _totalCountRunningAverage);
170+
}
171+
172+
if (_totalCountRunningAverage <= 0)
173+
{
174+
_probability = 1;
175+
}
176+
else
177+
{
178+
_probability = std::min(_samplesBudget / _totalCountRunningAverage, 1.0);
179+
}
180+
181+
counts.Reset();
182+
183+
if (_rollWindowCallback != nullptr)
184+
{
185+
_rollWindowCallback();
186+
}
187+
}
188+
189+
AdaptiveSampler::State AdaptiveSampler::GetInternalState()
190+
{
191+
auto* counts = _countsRef.load();
192+
193+
return State{
194+
counts->TestCount(),
195+
counts->SampleCount(),
196+
_samplesBudget,
197+
_probability,
198+
_totalCountRunningAverage};
199+
}

0 commit comments

Comments
 (0)