Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add munin plugin to read stats from collectd's RRD files
- Loading branch information
Showing
1 changed file
with
244 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,244 @@ | ||
#!/bin/sh | ||
: << =cut | ||
=head1 NAME | ||
radsniff - A plugin to consume statistics generated by radsniff via collectd RRD files | ||
=head1 APPLICABLE SYSTEMS | ||
radsniff 3.1.x or later | ||
=head1 CONFIGURATION | ||
This plugin uses the following configuration variables: | ||
[radsniff] | ||
env.host - The host collectd thinks the radsniff data | ||
originated from (defaults to current host). | ||
env.rrd_path - Path to the directory containing rrd files. | ||
env.type - Either radius_rtx, radius_latency or radius_count | ||
env.pkt_type - The type of packet to graph. | ||
=head1 AUTHOR | ||
Copyright (C) 2014 Arran Cudbard-Bell <a.cudbardb@freeradius.org> | ||
=head1 LICENSE | ||
This program is free software; you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation; either version 2 of the License, or | ||
(at your option) any later version. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with this program; if not, write to the Free Software | ||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA | ||
=head1 MAGIC MARKERS | ||
#%# family=manual | ||
=cut | ||
|
||
if [ -z "$type" ]; then | ||
echo "env.type must be set" >&2 | ||
exit -1 | ||
fi | ||
|
||
if [ -z "$pkt_type" ]; then | ||
echo "env.pkt_type must be set" >&2 | ||
exit -1 | ||
fi | ||
|
||
if [ "$1" = "config" ]; then | ||
pretty_pkt_type=`echo "$pkt_type" | sed -e 's/_/ /g' | sed 's/^./\U&\E/'` | ||
|
||
case "$type" in | ||
radius_rtx) | ||
echo "graph_title ${pretty_pkt_type} rtx" | ||
echo 'graph_args --base 1000 -l 0 ' | ||
echo 'graph_period second' | ||
echo 'graph_vlabel Exchanged / ${graph_period}' | ||
echo 'graph_category RADIUS' | ||
|
||
echo 'none.label no loss' | ||
echo 'none.info Responses received after first request' | ||
echo 'none.type GAUGE' | ||
echo 'none.min 0' | ||
|
||
echo 'one.label 1' | ||
echo 'one.info Responses received after one retransmission' | ||
echo 'one.type GAUGE' | ||
echo 'one.min 0' | ||
|
||
echo 'two.label 2' | ||
echo 'two.info Responses received after two retransmissions' | ||
echo 'two.type GAUGE' | ||
echo 'two.min 0' | ||
|
||
echo 'three.label 3' | ||
echo 'three.info Responses received after three retransmissions' | ||
echo 'three.type GAUGE' | ||
echo 'three.min 0' | ||
|
||
echo 'four.label 4' | ||
echo 'four.info Responses received after four retransmissions' | ||
echo 'four.type GAUGE' | ||
echo 'four.min 0' | ||
|
||
echo 'more.label more' | ||
echo 'more.info Responses received after more than four retransmissions' | ||
echo 'more.type GAUGE' | ||
echo 'more.min 0' | ||
|
||
echo 'lost.label lost' | ||
echo 'lost.info Requests to which no response was seen' | ||
echo 'lost.type GAUGE' | ||
echo 'lost.min 0' | ||
;; | ||
|
||
radius_latency) | ||
echo "graph_title ${pretty_pkt_type} latency" | ||
echo 'graph_args --base 1000 -l 0 ' | ||
echo 'graph_vlabel Latency (ms)' | ||
echo 'graph_category RADIUS' | ||
|
||
echo 'smoothed.label smoothed avg' | ||
echo 'smoothed.info Smoothed average' | ||
echo 'smoothed.type GAUGE' | ||
echo 'smoothed.min 0' | ||
|
||
echo 'avg.label avg' | ||
echo 'avg.info Average latency over the stats interval' | ||
echo 'avg.type GAUGE' | ||
echo 'avg.min 0' | ||
|
||
echo 'high.label high' | ||
echo 'high.info Highest latency over the stats interval' | ||
echo 'high.type GAUGE' | ||
echo 'high.min 0' | ||
|
||
echo 'low.label low' | ||
echo 'low.info Lowest latency over the stats interval' | ||
echo 'low.type GAUGE' | ||
echo 'low.min 0' | ||
;; | ||
|
||
radius_count) | ||
echo "graph_title $pretty_pkt_type counters" | ||
echo 'graph_args --base 1000 -l 0 ' | ||
echo 'graph_period second' | ||
echo 'graph_vlabel Packets / ${graph_period}' | ||
echo 'graph_category RADIUS' | ||
|
||
echo 'received.label received' | ||
echo 'received.info Packets of this type received' | ||
echo 'received.type GAUGE' | ||
echo 'received.min 0' | ||
|
||
echo 'linked.label linked' | ||
echo 'linked.info Packets linked to another request or response' | ||
echo 'linked.type GAUGE' | ||
echo 'linked.min 0' | ||
|
||
echo 'unlinked.label unlinked' | ||
echo 'unlinked.info Packets not linked to another request or response' | ||
echo 'unlinked.type GAUGE' | ||
echo 'unlinked.min 0' | ||
|
||
echo 'reused.label reused' | ||
echo 'reused.info Request which (prematurely) re-used the same ID as a previous request' | ||
echo 'reused.type GAUGE' | ||
echo 'reused.min 0' | ||
;; | ||
*) | ||
echo "env.type ($type) is invalid must be radius_rtx, radius_latency, or radius_count" >&2 | ||
exit -1 | ||
esac | ||
exit 0 | ||
fi | ||
|
||
HOST=${host:-`hostname -f`} | ||
RRD_PATH=${rrd_path:-"/var/lib/collectd/rrd/${HOST}/radsniff-exchanged"} | ||
RRD_PATH="${RRD_PATH}/${type}-${pkt_type}.rrd" | ||
RRD_RES=${rrd_res:-300} | ||
|
||
if [ ! -e "$RRD_PATH" ]; then | ||
echo "rrd file '$RRD_PATH' does not exist" >&2 | ||
exit -1 | ||
fi | ||
|
||
fetch_data() | ||
{ | ||
# RRD tool doesn't always select the correct period (seems | ||
# to round up and give us -nan results) in the interest of | ||
# gap free graphing, we attempt to get the last two periods | ||
# worth of data, and then use the newest non -nan one. | ||
# It's not perfect and should be fixed at some point... | ||
rrd_last=`rrdtool fetch "$RRD_PATH" $1 -r $RRD_RES -e $(expr $(date +%s) / $RRD_RES \* $RRD_RES) -s end-$(expr $RRD_RES \* 2)`; ret=$? | ||
if [ $ret -ne 0 ]; then | ||
echo "$rrd_last" >&2 | ||
exit $ret | ||
fi | ||
echo "$rrd_last" | head -1 | ||
echo "$rrd_last" | grep '^[0-9]*:' | grep -v -E '^[0-9]*:( -nan)*$' | tail -1 | ||
} | ||
|
||
fetch_data_column() | ||
{ | ||
echo "$(fetch_data $1)" | tail -1 | cut -d ' ' -f $(expr $2 + 2) | ||
} | ||
|
||
case "$type" in | ||
radius_rtx) | ||
col=2 | ||
rrd_data=$(fetch_data 'AVERAGE') | ||
for var in `echo "$rrd_data" | head -1`; do | ||
case "$var" in | ||
1) printf "one.value ";; | ||
2) printf "two.value ";; | ||
3) printf "three.value ";; | ||
4) printf "four.value ";; | ||
*) printf "$var.value ";; | ||
esac | ||
echo "$rrd_data" | tail -1 | cut -d ' ' -f $col | ||
col=`expr $col + 1` | ||
done | ||
;; | ||
|
||
radius_count) | ||
col=2 | ||
rrd_data=$(fetch_data 'AVERAGE') | ||
for var in `echo "$rrd_data" | head -1`; do | ||
printf "$var.value " | ||
echo "$rrd_data" | tail -1 | cut -d ' ' -f $col | ||
col=`expr $col + 1` | ||
done | ||
;; | ||
|
||
radius_latency) | ||
printf "smoothed.value " | ||
fetch_data_column 'AVERAGE' 0 | ||
|
||
printf "avg.value " | ||
fetch_data_column 'AVERAGE' 1 | ||
|
||
# Averages here are unacceptable because we use this to detect | ||
# abnormally long delays in responses, and if we average all the highs | ||
# over a five minute period, transient spikes in latency may be lost. | ||
printf "high.value " | ||
fetch_data_column 'MAX' 2 | ||
|
||
# Again we need the lowest value of the set here, as an abnormally | ||
# quick response may indicate something is wrong. | ||
printf "low.value " | ||
fetch_data_column 'MIN' 3 | ||
;; | ||
esac | ||
exit 0 |