-
Notifications
You must be signed in to change notification settings - Fork 1
/
check_cpu_stats.sh
executable file
·242 lines (231 loc) · 12.4 KB
/
check_cpu_stats.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
#!/bin/bash
# ==============================================================================
# CPU Utilization Statistics plugin for Nagios
#
# Original author: Steve Bosek
# Creation date: 8 September 2007
# Description: Monitoring plugin (script) to check cpu utilization statistics.
# This script has been designed and written on Unix platforms
# requiring iostat as external program.
# The script is used to query 6 of the key cpu statistics
# (user,system,iowait,steal,nice,idle) at the same time.
# History/Changes: HISTORY moved out of plugin into Git repository / README.md
# License: GNU General Public License v3.0 (GPL3), see LICENSE in Git repository
#
# Copyright 2007-2009,2011 Steve Bosek
# Copyright 2008 Bas van der Doorn
# Copyright 2008 Philipp Lemke
# Copyright 2016 Philipp Dallig
# Copyright 2022-2023 Claudio Kuenzler
#
# Usage: ./check_cpu_stats.sh [-w <user,system,iowait>] [-c <user,system,iowait>] ( [-i <report interval>] [-n <report number> ] [-b <N,processname>])
#
# Example: ./check_cpu_stats.sh
# ./check_cpu_stats.sh -w 70,40,30 -c 90,60,40
# ./check_cpu_stats.sh -w 70,40,30 -c 90,60,40 -i 3 -n 5 -b '1,apache2' -b '1,running process'
# ========================================================================================
# -----------------------------------------------------------------------------------------
# Plugin description
PROGNAME=$(basename $0)
RELEASE="Revision 3.1.5"
# Paths to commands used in this script. These may have to be modified to match your system setup.
export PATH=$PATH:/usr/local/bin:/usr/bin:/bin # Set path
IOSTAT="iostat"
#Needed for HP-UX
SAR="/usr/bin/sar"
# Nagios return codes
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
# Plugin default parameters value if not defined
LIST_WARNING_THRESHOLD=${LIST_WARNING_THRESHOLD:="70,40,30"}
LIST_CRITICAL_THRESHOLD=${LIST_CRITICAL_THRESHOLD:="90,60,40"}
INTERVAL_SEC=${INTERVAL_SEC:="1"}
NUM_REPORT=${NUM_REPORT:="3"}
# -----------------------------------------------------------------------------------------
# Check required commands
if [ `uname` = "HP-UX" ];then
if [ ! -x $SAR ]; then
echo "UNKNOWN: sar not found or is not executable by the nagios user."
exit $STATE_UNKNOWN
fi
else
for cmd in iostat; do
if ! `command -v ${cmd} >/dev/null 2>&1`; then
echo "UNKNOWN: ${cmd} does not exist, please check if command exists and PATH is correct"
exit ${STATE_UNKNOWN}
fi
done
fi
# -----------------------------------------------------------------------------------------
# Functions plugin usage
print_release() {
echo "$RELEASE"
exit ${STATE_UNKNOWN}
}
print_usage() {
echo ""
echo "$PROGNAME $RELEASE - Monitoring plugin to check CPU Utilization"
echo ""
echo "Usage: check_cpu_stats.sh [-w] [-c] [-i] [-n] [-b]+"
echo ""
echo " -w Warning threshold in % for warn_user,warn_system,warn_iowait CPU (default : 70,40,30)"
echo " -c Critical threshold in % for crit_user,crit_system,crit_iowait CPU (default : 90,60,40)"
echo " -i Interval in seconds for iostat (default : 1)"
echo " -n Number of reports for iostat (default : 3)"
echo " -b The plugin will exit OK when condition matches (number of CPUs and process running), expects an input of N,process (e.g. 4,apache2). Can be used multiple times: -b 1,puppet -b 4,apache2 -b 4,containerd. Works only under Linux."
echo " -v Show version"
echo " -h Show this page"
echo ""
echo "Usage: $PROGNAME"
echo "Usage: $PROGNAME --help"
echo ""
exit 0
}
print_help() {
print_usage
echo ""
echo "This plugin will check cpu utilization (user,system,iowait,idle in %)"
echo ""
exit 0
}
# -----------------------------------------------------------------------------------------
# Parse parameters
if [ "${1}" = "--help" ]; then print_help; exit $STATE_UNKNOWN; fi
while getopts "c:w:i:n:b:hv" Input
do
case ${Input} in
w) LIST_WARNING_THRESHOLD=${OPTARG};;
c) LIST_CRITICAL_THRESHOLD=${OPTARG};;
i) INTERVAL_SEC=${OPTARG};;
n) NUM_REPORT=${OPTARG};;
b) BAIL+=("${OPTARG}");;
h) print_help;;
v) print_release;;
*) print_help;;
esac
done
# -----------------------------------------------------------------------------------------
# List to Table for warning threshold
TAB_WARNING_THRESHOLD=( `echo $LIST_WARNING_THRESHOLD | sed 's/,/ /g'` )
if [ "${#TAB_WARNING_THRESHOLD[@]}" -ne "3" ]; then
echo "ERROR : Bad count parameter in Warning Threshold"
exit $STATE_WARNING
else
USER_WARNING_THRESHOLD=`echo ${TAB_WARNING_THRESHOLD[0]}`
SYSTEM_WARNING_THRESHOLD=`echo ${TAB_WARNING_THRESHOLD[1]}`
IOWAIT_WARNING_THRESHOLD=`echo ${TAB_WARNING_THRESHOLD[2]}`
fi
# List to Table for critical threshold
TAB_CRITICAL_THRESHOLD=( `echo $LIST_CRITICAL_THRESHOLD | sed 's/,/ /g'` )
if [ "${#TAB_CRITICAL_THRESHOLD[@]}" -ne "3" ]; then
echo "ERROR : Bad count parameter in CRITICAL Threshold"
exit $STATE_WARNING
else
USER_CRITICAL_THRESHOLD=`echo ${TAB_CRITICAL_THRESHOLD[0]}`
SYSTEM_CRITICAL_THRESHOLD=`echo ${TAB_CRITICAL_THRESHOLD[1]}`
IOWAIT_CRITICAL_THRESHOLD=`echo ${TAB_CRITICAL_THRESHOLD[2]}`
fi
if [ ${TAB_WARNING_THRESHOLD[0]} -ge ${TAB_CRITICAL_THRESHOLD[0]} -o ${TAB_WARNING_THRESHOLD[1]} -ge ${TAB_CRITICAL_THRESHOLD[1]} -o ${TAB_WARNING_THRESHOLD[2]} -ge ${TAB_CRITICAL_THRESHOLD[2]} ]; then
echo "ERROR : Critical CPU Threshold lower as Warning CPU Threshold "
exit $STATE_WARNING
fi
# -----------------------------------------------------------------------------------------
# CPU Utilization Statistics Unix Plateform ( Linux,AIX,Solaris are supported )
case `uname` in
Linux )
CPU_REPORT=`iostat -c $INTERVAL_SEC $NUM_REPORT | sed -e 's/,/./g' | tr -s ' ' ';' | sed '/^$/d' | tail -1`
CPU_REPORT_SECTIONS=`echo ${CPU_REPORT} | grep ';' -o | wc -l`
CPU_USER=`echo $CPU_REPORT | cut -d ";" -f 2`
CPU_NICE=`echo $CPU_REPORT | cut -d ";" -f 3`
CPU_SYSTEM=`echo $CPU_REPORT | cut -d ";" -f 4`
CPU_IOWAIT=`echo $CPU_REPORT | cut -d ";" -f 5`
if [ ${CPU_REPORT_SECTIONS} -ge 6 ]; then
CPU_STEAL=`echo $CPU_REPORT | cut -d ";" -f 6`
CPU_IDLE=`echo $CPU_REPORT | cut -d ";" -f 7`
NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}%, iowait=${CPU_IOWAIT}%, idle=${CPU_IDLE}%, nice=${CPU_NICE}%, steal=${CPU_STEAL}% | CpuUser=${CPU_USER}%;${TAB_WARNING_THRESHOLD[0]};${TAB_CRITICAL_THRESHOLD[0]};0; CpuSystem=${CPU_SYSTEM}%;${TAB_WARNING_THRESHOLD[1]};${TAB_CRITICAL_THRESHOLD[1]};0; CpuIowait=${CPU_IOWAIT}%;${TAB_WARNING_THRESHOLD[2]};${TAB_CRITICAL_THRESHOLD[2]};0; CpuIdle=${CPU_IDLE}%;0;0;0; CpuNice=${CPU_NICE}%;0;0;0; CpuSteal=${CPU_STEAL}%;0;0;0;"
else
CPU_IDLE=`echo $CPU_REPORT | cut -d ";" -f 6`
NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}%, iowait=${CPU_IOWAIT}%, idle=${CPU_IDLE}%, nice=${CPU_NICE}%, steal=0.00% | CpuUser=${CPU_USER}%;${TAB_WARNING_THRESHOLD[0]};${TAB_CRITICAL_THRESHOLD[0]};0; CpuSystem=${CPU_SYSTEM}%;${TAB_WARNING_THRESHOLD[1]};${TAB_CRITICAL_THRESHOLD[1]};0; CpuIowait=${CPU_IOWAIT}%;${TAB_WARNING_THRESHOLD[2]};${TAB_CRITICAL_THRESHOLD[2]};0; CpuIdle=${CPU_IDLE}%;0;0;0; CpuNice=${CPU_NICE}%;0;0;0; CpuSteal=0.0%;0;0;0;"
fi
# Bail out possible under certain situations
if [[ ${#BAIL[*]} -gt 0 ]]; then
BC_CPU=$(nproc)
o=0
while [ ${o} -lt ${#BAIL[*]} ]; do
BAIL_CPU[${o}]=$(echo "${BAIL[${o}]}" | awk -F',' '{print $1}')
BAIL_PROCESS[${o}]=$(echo "${BAIL[${o}]}" | awk -F',' '{print $2}')
BC_PROCESS=$(ps aux | grep "${BAIL_PROCESS[${o}]}" | egrep -v "(grep|check_cpu_stats)" | awk '{print $2}')
if [[ ${BAIL_CPU[${o}]} -eq ${BC_CPU} && ${BC_PROCESS} -gt 0 ]]; then
echo "CPU STATISTICS OK - bailing out because of matched bailout patterns - ${NAGIOS_DATA}"
exit $STATE_OK
fi
let o++
done
fi
;;
AIX ) CPU_REPORT=`iostat -t $INTERVAL_SEC $NUM_REPORT | sed -e 's/,/./g'|tr -s ' ' ';' | tail -1`
CPU_USER=`echo $CPU_REPORT | cut -d ";" -f 4`
CPU_SYSTEM=`echo $CPU_REPORT | cut -d ";" -f 5`
CPU_IOWAIT=`echo $CPU_REPORT | cut -d ";" -f 7`
CPU_IDLE=`echo $CPU_REPORT | cut -d ";" -f 6`
NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}%, iowait=${CPU_IOWAIT}%, idle=${CPU_IDLE}%, nice=0.00%, steal=0.00% | CpuUser=${CPU_USER}%;${TAB_WARNING_THRESHOLD[0]};${TAB_CRITICAL_THRESHOLD[0]};0; CpuSystem=${CPU_SYSTEM}%;${TAB_WARNING_THRESHOLD[1]};${TAB_CRITICAL_THRESHOLD[1]};0; CpuIowait=${CPU_IOWAIT}%;${TAB_WARNING_THRESHOLD[2]};${TAB_CRITICAL_THRESHOLD[2]};0; CpuIdle=${CPU_IDLE}%;0;0;0; CpuNice=0.0%;0;0;0; CpuSteal=0.0%;0;0;0;"
;;
SunOS ) CPU_REPORT=`iostat -c $INTERVAL_SEC $NUM_REPORT | tail -1`
CPU_USER=`echo $CPU_REPORT | awk '{ print $1 }'`
CPU_SYSTEM=`echo $CPU_REPORT | awk '{ print $2 }'`
CPU_IOWAIT=`echo $CPU_REPORT | awk '{ print $3 }'`
CPU_IDLE=`echo $CPU_REPORT | awk '{ print $4 }'`
NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}%, iowait=${CPU_IOWAIT}%, idle=${CPU_IDLE}%, nice=0.00%, steal=0.00% | CpuUser=${CPU_USER}%;${TAB_WARNING_THRESHOLD[0]};${TAB_CRITICAL_THRESHOLD[0]};0; CpuSystem=${CPU_SYSTEM}%;${TAB_WARNING_THRESHOLD[1]};${TAB_CRITICAL_THRESHOLD[1]};0; CpuIowait=${CPU_IOWAIT}%;${TAB_WARNING_THRESHOLD[2]};${TAB_CRITICAL_THRESHOLD[2]};0; CpuIdle=${CPU_IDLE}%;0;0;0; CpuNice=0.0%;0;0;0; CpuSteal=0.0%;0;0;0;"
;;
HP-UX) CPU_REPORT=`$SAR $INTERVAL_SEC $NUM_REPORT | grep Average`
CPU_USER=`echo $CPU_REPORT | awk '{ print $2 }'`
CPU_SYSTEM=`echo $CPU_REPORT | awk '{ print $3 }'`
CPU_IOWAIT=`echo $CPU_REPORT | awk '{ print $4 }'`
CPU_IDLE=`echo $CPU_REPORT | awk '{ print $5 }'`
NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}% iowait=${CPU_IOWAIT}% idle=${CPU_IDLE}% nice=0.00% steal=0.00% | CpuUser=${CPU_USER}%;${TAB_WARNING_THRESHOLD[0]};${TAB_CRITICAL_THRESHOLD[0]};0; CpuSystem=${CPU_SYSTEM}%;${TAB_WARNING_THRESHOLD[1]};${TAB_CRITICAL_THRESHOLD[1]};0; CpuIowait=${CPU_IOWAIT};${TAB_WARNING_THRESHOLD[2]};${TAB_CRITICAL_THRESHOLD[2]};0; CpuIdle=${CPU_IDLE}%;0;0;0; CpuNice=0.0%;0;0;0; CpuSteal=0.0%;0;0;0;"
;;
# MacOS X test
# Darwin ) CPU_REPORT=`iostat -w $INTERVAL_SEC -c $NUM_REPORT | tail -1`
# CPU_USER=`echo $CPU_REPORT | awk '{ print $4 }'`
# CPU_SYSTEM=`echo $CPU_REPORT | awk '{ print $5 }'`
# CPU_IDLE=`echo $CPU_REPORT | awk '{ print $6 }'`
# NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}% iowait=0.00% idle=${CPU_IDLE}% nice=0.00% steal=0.00% | CpuUser=${CPU_USER}%;${TAB_WARNING_THRESHOLD[0]};${TAB_CRITICAL_THRESHOLD[0]};0; CpuSystem=${CPU_SYSTEM}%;${TAB_WARNING_THRESHOLD[1]};${TAB_CRITICAL_THRESHOLD[1]};0; CpuIowait=0.0%;0;0;0; CpuIdle=${CPU_IDLE}%;0;0;0; CpuNice=0.0%;0;0;0; CpuSteal=0.0%;0;0;0;"
# ;;
*) echo "UNKNOWN: `uname` not yet supported by this plugin. Coming soon !"
exit $STATE_UNKNOWN
;;
esac
# -----------------------------------------------------------------------------------------
# Add for integer shell issue
CPU_USER_MAJOR=`echo $CPU_USER| cut -d "." -f 1`
CPU_SYSTEM_MAJOR=`echo $CPU_SYSTEM | cut -d "." -f 1`
CPU_IOWAIT_MAJOR=`echo $CPU_IOWAIT | cut -d "." -f 1`
CPU_IDLE_MAJOR=`echo $CPU_IDLE | cut -d "." -f 1`
# -----------------------------------------------------------------------------------------
# Return
if [ ${CPU_USER_MAJOR} -ge $USER_CRITICAL_THRESHOLD ]; then
echo "CPU STATISTICS CRITICAL : ${NAGIOS_DATA}"
exit $STATE_CRITICAL
elif [ ${CPU_SYSTEM_MAJOR} -ge $SYSTEM_CRITICAL_THRESHOLD ]; then
echo "CPU STATISTICS CRITICAL : ${NAGIOS_DATA}"
exit $STATE_CRITICAL
elif [ ${CPU_IOWAIT_MAJOR} -ge $IOWAIT_CRITICAL_THRESHOLD ]; then
echo "CPU STATISTICS CRITICAL : ${NAGIOS_DATA}"
exit $STATE_CRITICAL
elif [ ${CPU_USER_MAJOR} -ge $USER_WARNING_THRESHOLD ] && [ ${CPU_USER_MAJOR} -lt $USER_CRITICAL_THRESHOLD ]; then
echo "CPU STATISTICS WARNING : ${NAGIOS_DATA}"
exit $STATE_WARNING
elif [ ${CPU_SYSTEM_MAJOR} -ge $SYSTEM_WARNING_THRESHOLD ] && [ ${CPU_SYSTEM_MAJOR} -lt $SYSTEM_CRITICAL_THRESHOLD ]; then
echo "CPU STATISTICS WARNING : ${NAGIOS_DATA}"
exit $STATE_WARNING
elif [ ${CPU_IOWAIT_MAJOR} -ge $IOWAIT_WARNING_THRESHOLD ] && [ ${CPU_IOWAIT_MAJOR} -lt $IOWAIT_CRITICAL_THRESHOLD ]; then
echo "CPU STATISTICS WARNING : ${NAGIOS_DATA}"
exit $STATE_WARNING
else
echo "CPU STATISTICS OK : ${NAGIOS_DATA}"
exit $STATE_OK
fi
echo "CPU STATISTICS UNKNOWN: Should never reach this."
exit $STATE_UNKNOWN