Skip to content
Browse files

Add plugin: check_puppet.rb

  • Loading branch information...
1 parent e8e50b0 commit 64622dcbfa046d4f0a43e1e2a5b32f4c5fb99368 @arioch arioch committed
Showing with 155 additions and 5 deletions.
  1. +5 −0 README.md
  2. +6 −5 build.txt
  3. +144 −0 check_puppet.rb
View
5 README.md
@@ -65,6 +65,11 @@ An example Vagrant project has been included to get you started right away.
<td><a href="http://exchange.nagios.org/directory/Plugins/Web-Servers/Apache/Robert-Becht/details">upstream</a></td>
</tr>
<tr>
+ <td>check_puppet.rb</td>
+ <td><a href="http://www.devco.net/">R.I. Pienaar</a></ghoneycutttd>
+ <td><a href="https://github.com/ripienaar/monitoring-scripts/blob/master/puppet/check_puppet.rb">upstream</a></td>
+ </tr>
+ <tr>
<td>...</td>
<td>...</td>
<td><a href="http://google.com">upstream</a></td>
View
11 build.txt
@@ -1,5 +1,6 @@
-# Nagios plugin version iteration
-check_bacula 0.0.4 1
-check_mem.pl 1.8 1
-check_mysqld.pl 0.93 1
-check_apache-auto.pl 1.0 1
+# Nagios plugin version iteration
+check_apache-auto.pl 1.0 1
+check_bacula 0.0.4 1
+check_mem.pl 1.8 1
+check_mysqld.pl 0.93 1
+check_puppet.rb 70ba5a5155 1
View
144 check_puppet.rb
@@ -0,0 +1,144 @@
+#!/usr/bin/ruby
+
+# A simple nagios check that should be run as root
+# perhaps under the mcollective NRPE plugin and
+# can check when the last run was done of puppet.
+# It can also check fail counts and skip machines
+# that are not enabled
+#
+# The script will use the puppet last_run-summar.yaml
+# file to determine when last Puppet ran else the age
+# of the statefile.
+
+require 'optparse'
+require 'yaml'
+
+lockfile = "/var/lib/puppet/state/puppetdlock"
+statefile = "/var/lib/puppet/state/state.yaml"
+summaryfile = "/var/lib/puppet/state/last_run_summary.yaml"
+enabled = true
+running = false
+lastrun_failed = false
+lastrun = 0
+failcount = 0
+warn = 0
+crit = 0
+enabled_only = false
+failures = false
+
+opt = OptionParser.new
+
+opt.on("--critical [CRIT]", "-c", Integer, "Critical threshold, time or failed resources") do |f|
+ crit = f.to_i
+end
+
+opt.on("--warn [WARN]", "-w", Integer, "Warning thresold, time of failed resources") do |f|
+ warn = f.to_i
+end
+
+opt.on("--check-failures", "-f", "Check for failed resources instead of time since run") do |f|
+ failures = true
+end
+
+opt.on("--only-enabled", "-e", "Only alert if Puppet is enabled") do |f|
+ enabled_only = true
+end
+
+opt.on("--lock-file [FILE]", "-l", "Location of the lock file, default #{lockfile}") do |f|
+ lockfile = f
+end
+
+opt.on("--state-file [FILE]", "-t", "Location of the state file, default #{statefile}") do |f|
+ statefile = f
+end
+
+opt.on("--summary-file [FILE]", "-s", "Location of the summary file, default #{summaryfile}") do |f|
+ summaryfile = f
+end
+
+opt.parse!
+
+if warn == 0 || crit == 0
+ puts "Please specify a warning and critical level"
+ exit 3
+end
+
+if File.exists?(lockfile)
+ if File::Stat.new(lockfile).zero?
+ enabled = false
+ else
+ running = true
+ end
+end
+
+lastrun = File.stat(statefile).mtime.to_i if File.exists?(statefile)
+
+if File.exists?(summaryfile)
+ begin
+ summary = YAML.load_file(summaryfile)
+ lastrun = summary["time"]["last_run"]
+
+ # machines that outright failed to run like on missing dependencies
+ # are treated as huge failures. The yaml file will be valid but
+ # it wont have anything but last_run in it
+ unless summary.include?("events")
+ failcount = 99
+ else
+ # and unless there are failures, the events hash just wont have the failure count
+ failcount = summary["events"]["failure"] || 0
+ end
+ rescue
+ failcount = 0
+ summary = nil
+ end
+end
+
+time_since_last_run = Time.now.to_i - lastrun
+
+unless failures
+ if enabled_only && enabled == false
+ puts "OK: Puppet is currently disabled, not alerting. Last run #{time_since_last_run} seconds ago with #{failcount} failures"
+ exit 0
+ end
+
+ if time_since_last_run >= crit
+ puts "CRITICAL: Puppet last ran #{time_since_last_run} seconds ago, expected < #{crit}"
+ exit 2
+
+ elsif time_since_last_run >= warn
+ puts "WARNING: Puppet last ran #{time_since_last_run} seconds ago, expected < #{warn}"
+ exit 1
+
+ else
+ if enabled
+ puts "OK: Puppet is currently enabled, last run #{time_since_last_run} seconds ago with #{failcount} failures"
+ else
+ puts "OK: Puppet is currently disabled, last run #{time_since_last_run} seconds ago with #{failcount} failures"
+ end
+
+ exit 0
+ end
+else
+ if enabled_only && enabled == false
+ puts "OK: Puppet is currently disabled, not alerting. Last run #{time_since_last_run} seconds ago with #{failcount} failures"
+ exit 0
+ end
+
+ if failcount >= crit
+ puts "CRITICAL: Puppet last ran had #{failcount} failures, expected < #{crit}"
+ exit 2
+
+ elsif failcount >= warn
+ puts "WARNING: Puppet last ran had #{failcount} failures, expected < #{warn}"
+ exit 1
+
+ else
+ if enabled
+ puts "OK: Puppet is currently enabled, last run #{time_since_last_run} seconds ago with #{failcount} failures"
+ else
+ puts "OK: Puppet is currently disabled, last run #{time_since_last_run} seconds ago with #{failcount} failures"
+ end
+
+ exit 0
+ end
+end

0 comments on commit 64622dc

Please sign in to comment.
Something went wrong with that request. Please try again.