Skip to content

Commit

Permalink
Add munin plugin to read stats from collectd's RRD files
Browse files Browse the repository at this point in the history
  • Loading branch information
arr2036 committed Jan 1, 2014
1 parent 65237bd commit 49bab0a
Showing 1 changed file with 244 additions and 0 deletions.
244 changes: 244 additions & 0 deletions scripts/munin/radsniff
@@ -0,0 +1,244 @@
#!/bin/sh
: << =cut
=head1 NAME
radsniff - A plugin to consume statistics generated by radsniff via collectd RRD files
=head1 APPLICABLE SYSTEMS
radsniff 3.1.x or later
=head1 CONFIGURATION
This plugin uses the following configuration variables:
[radsniff]
env.host - The host collectd thinks the radsniff data
originated from (defaults to current host).
env.rrd_path - Path to the directory containing rrd files.
env.type - Either radius_rtx, radius_latency or radius_count
env.pkt_type - The type of packet to graph.
=head1 AUTHOR
Copyright (C) 2014 Arran Cudbard-Bell <a.cudbardb@freeradius.org>
=head1 LICENSE
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
=head1 MAGIC MARKERS
#%# family=manual
=cut

if [ -z "$type" ]; then
echo "env.type must be set" >&2
exit -1
fi

if [ -z "$pkt_type" ]; then
echo "env.pkt_type must be set" >&2
exit -1
fi

if [ "$1" = "config" ]; then
pretty_pkt_type=`echo "$pkt_type" | sed -e 's/_/ /g' | sed 's/^./\U&\E/'`

case "$type" in
radius_rtx)
echo "graph_title ${pretty_pkt_type} rtx"
echo 'graph_args --base 1000 -l 0 '
echo 'graph_period second'
echo 'graph_vlabel Exchanged / ${graph_period}'
echo 'graph_category RADIUS'

echo 'none.label no loss'
echo 'none.info Responses received after first request'
echo 'none.type GAUGE'
echo 'none.min 0'

echo 'one.label 1'
echo 'one.info Responses received after one retransmission'
echo 'one.type GAUGE'
echo 'one.min 0'

echo 'two.label 2'
echo 'two.info Responses received after two retransmissions'
echo 'two.type GAUGE'
echo 'two.min 0'

echo 'three.label 3'
echo 'three.info Responses received after three retransmissions'
echo 'three.type GAUGE'
echo 'three.min 0'

echo 'four.label 4'
echo 'four.info Responses received after four retransmissions'
echo 'four.type GAUGE'
echo 'four.min 0'

echo 'more.label more'
echo 'more.info Responses received after more than four retransmissions'
echo 'more.type GAUGE'
echo 'more.min 0'

echo 'lost.label lost'
echo 'lost.info Requests to which no response was seen'
echo 'lost.type GAUGE'
echo 'lost.min 0'
;;

radius_latency)
echo "graph_title ${pretty_pkt_type} latency"
echo 'graph_args --base 1000 -l 0 '
echo 'graph_vlabel Latency (ms)'
echo 'graph_category RADIUS'

echo 'smoothed.label smoothed avg'
echo 'smoothed.info Smoothed average'
echo 'smoothed.type GAUGE'
echo 'smoothed.min 0'

echo 'avg.label avg'
echo 'avg.info Average latency over the stats interval'
echo 'avg.type GAUGE'
echo 'avg.min 0'

echo 'high.label high'
echo 'high.info Highest latency over the stats interval'
echo 'high.type GAUGE'
echo 'high.min 0'

echo 'low.label low'
echo 'low.info Lowest latency over the stats interval'
echo 'low.type GAUGE'
echo 'low.min 0'
;;

radius_count)
echo "graph_title $pretty_pkt_type counters"
echo 'graph_args --base 1000 -l 0 '
echo 'graph_period second'
echo 'graph_vlabel Packets / ${graph_period}'
echo 'graph_category RADIUS'

echo 'received.label received'
echo 'received.info Packets of this type received'
echo 'received.type GAUGE'
echo 'received.min 0'

echo 'linked.label linked'
echo 'linked.info Packets linked to another request or response'
echo 'linked.type GAUGE'
echo 'linked.min 0'

echo 'unlinked.label unlinked'
echo 'unlinked.info Packets not linked to another request or response'
echo 'unlinked.type GAUGE'
echo 'unlinked.min 0'

echo 'reused.label reused'
echo 'reused.info Request which (prematurely) re-used the same ID as a previous request'
echo 'reused.type GAUGE'
echo 'reused.min 0'
;;
*)
echo "env.type ($type) is invalid must be radius_rtx, radius_latency, or radius_count" >&2
exit -1
esac
exit 0
fi

HOST=${host:-`hostname -f`}
RRD_PATH=${rrd_path:-"/var/lib/collectd/rrd/${HOST}/radsniff-exchanged"}
RRD_PATH="${RRD_PATH}/${type}-${pkt_type}.rrd"
RRD_RES=${rrd_res:-300}

if [ ! -e "$RRD_PATH" ]; then
echo "rrd file '$RRD_PATH' does not exist" >&2
exit -1
fi

fetch_data()
{
# RRD tool doesn't always select the correct period (seems
# to round up and give us -nan results) in the interest of
# gap free graphing, we attempt to get the last two periods
# worth of data, and then use the newest non -nan one.
# It's not perfect and should be fixed at some point...
rrd_last=`rrdtool fetch "$RRD_PATH" $1 -r $RRD_RES -e $(expr $(date +%s) / $RRD_RES \* $RRD_RES) -s end-$(expr $RRD_RES \* 2)`; ret=$?
if [ $ret -ne 0 ]; then
echo "$rrd_last" >&2
exit $ret
fi
echo "$rrd_last" | head -1
echo "$rrd_last" | grep '^[0-9]*:' | grep -v -E '^[0-9]*:( -nan)*$' | tail -1
}

fetch_data_column()
{
echo "$(fetch_data $1)" | tail -1 | cut -d ' ' -f $(expr $2 + 2)
}

case "$type" in
radius_rtx)
col=2
rrd_data=$(fetch_data 'AVERAGE')
for var in `echo "$rrd_data" | head -1`; do
case "$var" in
1) printf "one.value ";;
2) printf "two.value ";;
3) printf "three.value ";;
4) printf "four.value ";;
*) printf "$var.value ";;
esac
echo "$rrd_data" | tail -1 | cut -d ' ' -f $col
col=`expr $col + 1`
done
;;

radius_count)
col=2
rrd_data=$(fetch_data 'AVERAGE')
for var in `echo "$rrd_data" | head -1`; do
printf "$var.value "
echo "$rrd_data" | tail -1 | cut -d ' ' -f $col
col=`expr $col + 1`
done
;;

radius_latency)
printf "smoothed.value "
fetch_data_column 'AVERAGE' 0

printf "avg.value "
fetch_data_column 'AVERAGE' 1

# Averages here are unacceptable because we use this to detect
# abnormally long delays in responses, and if we average all the highs
# over a five minute period, transient spikes in latency may be lost.
printf "high.value "
fetch_data_column 'MAX' 2

# Again we need the lowest value of the set here, as an abnormally
# quick response may indicate something is wrong.
printf "low.value "
fetch_data_column 'MIN' 3
;;
esac
exit 0

0 comments on commit 49bab0a

Please sign in to comment.