Skip to content

Commit

Permalink
dnsdist: Add Prometheus latency histogram support
Browse files Browse the repository at this point in the history
  • Loading branch information
Marlinc committed May 28, 2019
1 parent 2153346 commit 0957b46
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 2 deletions.
14 changes: 14 additions & 0 deletions pdns/dnsdist-web.cc
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,20 @@ static void connectionThread(int sock, ComboAddress remote)
output << "\n";
}

// Latency histogram buckets
uint64_t latency_amounts = g_stats.latency0_1;
output << "latency_bucket{le=\"1\"} " << latency_amounts << "\n";
latency_amounts += g_stats.latency1_10;
output << "latency_bucket{le=\"10\"} " << latency_amounts << "\n";
latency_amounts += g_stats.latency10_50;
output << "latency_bucket{le=\"50\"} " << latency_amounts << "\n";
latency_amounts += g_stats.latency50_100;
output << "latency_bucket{le=\"100\"} " << latency_amounts << "\n";
latency_amounts += g_stats.latency100_1000;
output << "latency_bucket{le=\"1000\"} " << latency_amounts << "\n";
latency_amounts += g_stats.latencySlow; // Should be the same as latency_count
output << "latency_bucket{le=\"+Inf\"} " << latency_amounts << "\n";

auto states = g_dstates.getLocal();
const string statesbase = "dnsdist_server_";

Expand Down
6 changes: 6 additions & 0 deletions pdns/dnsdist.cc
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ void doLatencyStats(double udiff)
else if(udiff < 100000) ++g_stats.latency50_100;
else if(udiff < 1000000) ++g_stats.latency100_1000;
else ++g_stats.latencySlow;
g_stats.latencySum += udiff / 1000;

auto doAvg = [](double& var, double n, double weight) {
var = (weight -1) * var/weight + n/weight;
Expand Down Expand Up @@ -2792,3 +2793,8 @@ catch(PDNSException &ae)
errlog("Fatal pdns error: %s", ae.reason);
_exit(EXIT_FAILURE);
}

uint64_t getLatencyCount(const std::string&)
{
return g_stats.responses + g_stats.selfAnswered + g_stats.cacheHits;
}
12 changes: 10 additions & 2 deletions pdns/dnsdist.hh
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;

extern vector<pair<struct timeval, std::string> > g_confDelta;

extern uint64_t getLatencyCount(const std::string&);

struct DNSDistStats
{
using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
Expand All @@ -251,7 +253,7 @@ struct DNSDistStats
stat_t noPolicy{0};
stat_t cacheHits{0};
stat_t cacheMisses{0};
stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0};
stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
stat_t securityStatus{0};

double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
Expand Down Expand Up @@ -298,7 +300,10 @@ struct DNSDistStats
{"fd-usage", getOpenFileDescriptors},
{"dyn-blocked", &dynBlocked},
{"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
{"security-status", &securityStatus}
{"security-status", &securityStatus},
// Latency histogram
{"latency-sum", &latencySum},
{"latency-count", getLatencyCount},
};
};

Expand Down Expand Up @@ -390,6 +395,9 @@ struct MetricDefinitionStorage {
{ "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
{ "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") },
{ "security-status", MetricDefinition(PrometheusMetricType::gauge, "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
// Latency histogram
{ "latency-sum", MetricDefinition(PrometheusMetricType::counter, "Total response time in milliseconds")},
{ "latency-count", MetricDefinition(PrometheusMetricType::counter, "Number of queries contributing to response time histogram")},
};
};

Expand Down
12 changes: 12 additions & 0 deletions pdns/dnsdistdist/docs/statistics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,18 @@ latency-slow
------------
Number of queries answered in more than 1 second.

latency-sum
------------
Total response time in milliseconds.

latency-count
------------
Number of queries contributing to response time histogram

latency-bucket
------------
Number of queries contributing to response time histogram per latency bucket

latency0-1
----------
Number of queries answered in less than 1 ms.
Expand Down

0 comments on commit 0957b46

Please sign in to comment.