public
Fork of mojombo/god
Description: Ruby process monitor
Homepage: http://god.rubyforge.org
Clone URL: git://github.com/topfunky/god.git
test http_response_code condition, add human readable condition logging
mojombo (author)
Thu Sep 20 18:12:09 -0700 2007
commit  6dcfaa880de4efdd196a99c0bbbb1e21c4685aa3
tree    879cda8af0cb19c6a4484b8f07f781124bef3a06
parent  889de6651b36d3eba10ac4afd6ea40eaa1aa84b5
...
6
7
8
 
9
10
11
...
17
18
19
 
20
21
22
...
6
7
8
9
10
11
12
...
18
19
20
21
22
23
24
0
@@ -6,6 +6,7 @@
0
   * Implement notification system
0
   * Add Tasks (a generalization of Watches) to do non-process related tasks
0
   * Add example init.d file in GOD_INSTALL_DIR/init/god
0
+ * Add human readable info to conditions (and make low level log lines debug)
0
 * Minor Enchancements
0
   * Allow EventConditions to do transition overloading
0
   * Report errors during god startup instead of failing silently
0
@@ -17,6 +18,7 @@
0
   * Add `god quit` to stop god without stopping any tasks
0
 * New Conditions
0
   * Flapping < TriggerCondition - trigger on state change
0
+ * HttpResponseCode < PollCondition - trigger on http response code or timeout (thx scott becker)
0
 * New Contacts
0
   * Email < Contact - notify via email (smtp, sendmail)
0
 * Bug Fixes
...
18
19
20
 
21
22
23
...
62
63
64
 
65
66
67
68
69
70
 
71
72
73
...
18
19
20
21
22
23
24
...
63
64
65
66
67
68
69
70
71
72
73
74
75
76
0
@@ -18,6 +18,7 @@ lib/god/conditions/always.rb
0
 lib/god/conditions/cpu_usage.rb
0
 lib/god/conditions/degrading_lambda.rb
0
 lib/god/conditions/flapping.rb
0
+lib/god/conditions/http_response_code.rb
0
 lib/god/conditions/lambda.rb
0
 lib/god/conditions/memory_usage.rb
0
 lib/god/conditions/process_exits.rb
0
@@ -62,12 +63,14 @@ test/configs/real.rb
0
 test/configs/running_load/running_load.god
0
 test/configs/stress/simple_server.rb
0
 test/configs/stress/stress.god
0
+test/configs/task/logs/.placeholder
0
 test/configs/task/task.god
0
 test/configs/test.rb
0
 test/helper.rb
0
 test/suite.rb
0
 test/test_behavior.rb
0
 test/test_condition.rb
0
+test/test_conditions_http_response_code.rb
0
 test/test_conditions_process_running.rb
0
 test/test_conditions_tries.rb
0
 test/test_contact.rb
...
73
74
75
 
76
77
 
78
79
80
...
125
126
127
 
128
129
130
...
73
74
75
76
77
 
78
79
80
81
...
126
127
128
129
130
131
132
0
@@ -73,8 +73,9 @@ module God
0
   VERSION = '0.5.0'
0
   
0
   LOG = Logger.new
0
+ LOG.datetime_format = "%Y-%m-%d %H:%M:%S "
0
     
0
- LOG_BUFFER_SIZE_DEFAULT = 100
0
+ LOG_BUFFER_SIZE_DEFAULT = 1000
0
   PID_FILE_DIRECTORY_DEFAULT = '/var/run/god'
0
   DRB_PORT_DEFAULT = 17165
0
   DRB_ALLOW_DEFAULT = ['127.0.0.1']
0
@@ -125,6 +126,7 @@ module God
0
     self.pid_file_directory ||= PID_FILE_DIRECTORY_DEFAULT
0
     self.port ||= DRB_PORT_DEFAULT
0
     self.allow ||= DRB_ALLOW_DEFAULT
0
+ LOG.level = Logger::INFO
0
     
0
     # init has been executed
0
     self.inited = true
...
1
2
3
4
 
5
6
7
...
1
2
3
 
4
5
6
7
0
@@ -1,7 +1,7 @@
0
 module God
0
   
0
   class Condition < Behavior
0
- attr_accessor :transition, :notify
0
+ attr_accessor :transition, :notify, :info
0
     
0
     # Generate a Condition of the given kind. The proper class if found by camel casing the
0
     # kind (which is given as an underscored symbol).
...
4
5
6
 
 
 
 
7
8
9
...
4
5
6
7
8
9
10
11
12
13
0
@@ -4,6 +4,10 @@ module God
0
     class Always < PollCondition
0
       attr_accessor :what
0
       
0
+ def initialize
0
+ self.info = "always"
0
+ end
0
+
0
       def valid?
0
         valid = true
0
         valid &= complain("Attribute 'what' must be specified", self) if self.what.nil?
...
31
32
33
 
 
 
34
35
 
36
37
 
38
39
40
...
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
0
@@ -31,10 +31,15 @@ module God
0
         pid = File.read(self.watch.pid_file).strip
0
         process = System::Process.new(pid)
0
         @timeline.push(process.percent_cpu)
0
+
0
+ history = "[" + @timeline.map { |x| "#{x}%%" }.join(", ") + "]"
0
+
0
         if @timeline.select { |x| x > self.above }.size >= self.times.first
0
           @timeline.clear
0
+ self.info = "cpu out of bounds #{history}"
0
           return true
0
         else
0
+ self.info = "cpu within bounds #{history}"
0
           return false
0
         end
0
       end
...
4
5
6
 
 
 
 
7
8
9
...
4
5
6
7
8
9
10
11
12
13
0
@@ -4,6 +4,10 @@ module God
0
     class Flapping < TriggerCondition
0
       attr_accessor :times, :within, :from_state, :to_state, :retry_in, :retry_times, :retry_within
0
       
0
+ def initialize
0
+ self.info = "process is flapping"
0
+ end
0
+
0
       def prepare
0
         @timeline = Timeline.new(self.times)
0
         @retry_timeline = Timeline.new(self.retry_times)
...
4
5
6
7
8
 
 
 
 
 
 
 
 
9
10
11
12
13
14
15
16
 
 
17
18
19
...
21
22
23
24
 
25
26
27
...
32
33
34
35
 
36
37
38
...
41
42
43
44
 
 
45
46
 
47
48
49
 
50
51
52
 
53
54
55
56
57
58
59
 
 
 
 
 
 
 
 
 
 
 
 
60
61
62
...
4
5
6
 
 
7
8
9
10
11
12
13
14
15
16
17
18
19
20
 
 
21
22
23
24
25
...
27
28
29
 
30
31
32
33
...
38
39
40
 
41
42
43
44
...
47
48
49
 
50
51
52
 
53
54
55
 
56
57
58
 
59
60
61
62
63
64
 
 
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
0
@@ -4,16 +4,22 @@ module God
0
   module Conditions
0
     
0
     class HttpResponseCode < PollCondition
0
- attr_accessor :code_is, :code_is_not, :times, :host, :port, :timeout, :path
0
-
0
+ attr_accessor :code_is, # e.g. 500 or '500' or [404, 500] or %w{404 500}
0
+ :code_is_not, # e.g. 200 or '200' or [200, 302] or %w{200 302}
0
+ :times, # e.g. 3 or [3, 5]
0
+ :host, # e.g. www.example.com
0
+ :port, # e.g. 8080
0
+ :timeout, # e.g. 60.seconds
0
+ :path # e.g. '/'
0
+
0
       def initialize
0
         super
0
         self.times = [1, 1]
0
       end
0
       
0
       def prepare
0
- self.code_is = Array(self.code) if self.code_is
0
- self.code_is_not = Array(self.code_is_not) if self.code_is_not
0
+ self.code_is = Array(self.code_is).map { |x| x.to_i } if self.code_is
0
+ self.code_is_not = Array(self.code_is_not).map { |x| x.to_i } if self.code_is_not
0
         
0
         if self.times.kind_of?(Integer)
0
           self.times = [self.times, self.times]
0
@@ -21,7 +27,7 @@ module God
0
         
0
         @timeline = Timeline.new(self.times[1])
0
       end
0
-
0
+
0
       def valid?
0
         valid = true
0
         valid &= complain("Attribute 'host' must be specified", self) if self.host.nil?
0
@@ -32,7 +38,7 @@ module God
0
         valid &= complain("Attribute 'timeout' must be specified", self) if self.timeout.nil?
0
         valid
0
       end
0
-
0
+
0
       def test
0
         response = nil
0
         
0
@@ -41,22 +47,33 @@ module God
0
           response = http.head(self.path)
0
         end
0
         
0
- if self.code_is && self.code_is.include?(response.code)
0
+ actual_response_code = response.code.to_i
0
+ if self.code_is && self.code_is.include?(actual_response_code)
0
           pass
0
- elsif self.code_is_not && !self.code.include?(response.code)
0
+ elsif self.code_is_not && !self.code_is_not.include?(actual_response_code)
0
           pass
0
         else
0
- false
0
+ fail
0
         end
0
       rescue Timeout::Error
0
- self.code_is ? false : pass
0
+ self.code_is ? fail : pass
0
       end
0
       
0
       private
0
       
0
       def pass
0
- @timeline.clear
0
- return true
0
+ @timeline << true
0
+ if @timeline.select { |x| x }.size >= self.times.first
0
+ @timeline.clear
0
+ true
0
+ else
0
+ false
0
+ end
0
+ end
0
+
0
+ def fail
0
+ @timeline << false
0
+ false
0
       end
0
       
0
     end
...
11
12
13
14
 
 
 
 
 
 
 
15
16
17
...
11
12
13
 
14
15
16
17
18
19
20
21
22
23
0
@@ -11,7 +11,13 @@ module God
0
       end
0
 
0
       def test
0
- self.lambda.call()
0
+ if self.lambda.call()
0
+ self.info = "lambda condition was satisfied"
0
+ true
0
+ else
0
+ self.info = "lambda condition was not satisfied"
0
+ false
0
+ end
0
       end
0
     end
0
 
...
31
32
33
 
 
 
34
35
 
36
37
 
38
39
40
...
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
0
@@ -31,10 +31,15 @@ module God
0
         pid = File.read(self.watch.pid_file).strip
0
         process = System::Process.new(pid)
0
         @timeline.push(process.memory)
0
+
0
+ history = "[" + @timeline.map { |x| "#{x}kb" }.join(", ") + "]"
0
+
0
         if @timeline.select { |x| x > self.above }.size >= self.times.first
0
           @timeline.clear
0
+ self.info = "memory out of bounds #{history}"
0
           return true
0
         else
0
+ self.info = "memory within bounds #{history}"
0
           return false
0
         end
0
       end
...
2
3
4
 
 
 
 
5
6
7
...
2
3
4
5
6
7
8
9
10
11
0
@@ -2,6 +2,10 @@ module God
0
   module Conditions
0
     
0
     class ProcessExits < EventCondition
0
+ def initialize
0
+ self.info = "process exited"
0
+ end
0
+
0
       def valid?
0
         valid = true
0
         valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
...
12
13
14
 
 
15
16
17
18
19
 
20
21
22
23
24
25
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
28
29
...
12
13
14
15
16
17
 
 
 
 
18
19
20
21
22
23
24
 
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
0
@@ -12,18 +12,30 @@ module God
0
       end
0
     
0
       def test
0
+ self.info = []
0
+
0
         unless File.exist?(self.watch.pid_file)
0
- msg = "#{self.watch.name} #{self.class.name}: no such pid file: #{self.watch.pid_file}"
0
- Syslog.debug(msg)
0
- LOG.log(self.watch, :info, msg)
0
-
0
+ self.info << "#{self.watch.name} #{self.class.name}: no such pid file: #{self.watch.pid_file}"
0
           return !self.running
0
         end
0
         
0
         pid = File.read(self.watch.pid_file).strip
0
         active = System::Process.new(pid).exists?
0
         
0
- (self.running && active) || (!self.running && !active)
0
+ if (self.running && active)
0
+ self.info << "process is running"
0
+ true
0
+ elsif (!self.running && !active)
0
+ self.info << "process is not running"
0
+ true
0
+ else
0
+ if self.running
0
+ self.info << "process is not running"
0
+ else
0
+ self.info << "process is running"
0
+ end
0
+ false
0
+ end
0
       end
0
     end
0
     
...
20
21
22
 
 
23
24
 
25
26
 
27
28
29
...
20
21
22
23
24
25
26
27
28
29
30
31
32
33
0
@@ -20,10 +20,14 @@ module God
0
         concensus = (@timeline.size == self.times)
0
         duration = self.within.nil? || (@timeline.last - @timeline.first) < self.within
0
         
0
+ history = "[" + @timeline.map { |x| "#{x}" }.join(", ") + "]"
0
+
0
         if concensus && duration
0
           @timeline.clear if within.nil?
0
+ self.info = "tries exceeded #{history}"
0
           return true
0
         else
0
+ self.info = "tries within bounds #{history}"
0
           return false
0
         end
0
       end
...
26
27
28
29
 
30
31
32
 
 
 
 
33
34
35
...
26
27
28
 
29
30
31
32
33
34
35
36
37
38
39
0
@@ -26,10 +26,14 @@ module God
0
       true
0
     end
0
     
0
- def friendly_name
0
+ def base_name
0
       self.class.name.split('::').last
0
     end
0
     
0
+ def friendly_name
0
+ base_name
0
+ end
0
+
0
     def self.complain(text, c = nil)
0
       msg = text
0
       msg += " for #{c.friendly_name}" if c
...
55
56
57
58
59
60
 
61
62
63
...
113
114
115
116
117
118
 
 
119
120
121
...
142
143
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
146
147
 
 
148
149
150
 
 
151
152
153
...
55
56
57
 
 
 
58
59
60
61
...
111
112
113
 
 
 
114
115
116
117
118
...
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
 
 
175
176
177
 
 
178
179
180
181
182
0
@@ -55,9 +55,7 @@ module God
0
               result = condition.test
0
               
0
               # log
0
- msg = watch.name + ' ' + condition.class.name + " [#{result}] " + self.dest_desc(metric, condition)
0
- Syslog.debug(msg)
0
- LOG.log(watch, :info, msg)
0
+ self.log(watch, metric, condition, result)
0
               
0
               # notify
0
               if condition.notify
0
@@ -113,9 +111,8 @@ module God
0
           watch = metric.watch
0
           
0
           watch.mutex.synchronize do
0
- msg = watch.name + ' ' + condition.class.name + " [true] " + self.dest_desc(metric, condition)
0
- Syslog.debug(msg)
0
- LOG.log(watch, :info, msg)
0
+ # log
0
+ self.log(watch, metric, condition, true)
0
             
0
             # notify
0
             if condition.notify
0
@@ -142,12 +139,44 @@ module God
0
     
0
     # helpers
0
     
0
+ def self.log(watch, metric, condition, result)
0
+ # log info if available
0
+ if condition.info
0
+ begin
0
+ status =
0
+ if (metric.destination && metric.destination.keys.size == 2) || result == true
0
+ "[trigger]"
0
+ else
0
+ "[ok]"
0
+ end
0
+
0
+ Array(condition.info).each do |condition_info|
0
+ msg = "#{watch.name} #{status} #{condition_info} (#{condition.base_name})"
0
+ Syslog.debug(msg)
0
+ LOG.log(watch, :info, msg % [])
0
+ end
0
+ rescue Exception => e
0
+ puts e.message
0
+ puts e.backtrace.join("\n")
0
+ end
0
+ else
0
+ msg = "#{watch.name} [unknown] (#{condition.base_name})"
0
+ Syslog.debug(msg)
0
+ LOG.log(watch, :info, msg % [])
0
+ end
0
+
0
+ # log
0
+ msg = watch.name + ' ' + condition.base_name + " [#{result}] " + self.dest_desc(metric, condition)
0
+ Syslog.debug(msg)
0
+ LOG.log(watch, :debug, msg)
0
+ end
0
+
0
     def self.dest_desc(metric, condition)
0
- if metric.destination
0
- metric.destination.inspect
0
+ if condition.transition
0
+ {true => condition.transition}.inspect
0
       else
0
- if condition.transition
0
- {true => condition.transition}.inspect
0
+ if metric.destination
0
+ metric.destination.inspect
0
         else
0
           'none'
0
         end
...
10
11
12
 
 
 
 
 
 
 
 
 
 
 
 
13
14
15
...
21
22
23
24
25
26
27
28
29
30
31
...
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
...
33
34
35
 
 
 
 
 
36
37
38
0
@@ -10,6 +10,18 @@ God.watch do |w|
0
     end
0
   end
0
   
0
+ w.restart_if do |restart|
0
+ restart.condition(:cpu_usage) do |c|
0
+ c.above = 50.percent
0
+ c.times = [3, 5]
0
+ end
0
+
0
+ restart.condition(:memory_usage) do |c|
0
+ c.above = 10.megabytes
0
+ c.times = [3, 5]
0
+ end
0
+ end
0
+
0
   # lifecycle
0
   w.lifecycle do |on|
0
     on.condition(:flapping) do |c|
0
@@ -21,10 +33,5 @@ God.watch do |w|
0
       c.retry_times = 2
0
       c.retry_within = 5.minutes
0
     end
0
-
0
- on.condition(:cpu_usage) do |c|
0
- c.above = 10.percent
0
- c.times = [3, 5]
0
- end
0
   end
0
 end
0
\ No newline at end of file
...
1
2
3
4
 
 
 
 
 
 
 
 
 
 
5
...
1
2
 
3
4
5
6
7
8
9
10
11
12
13
14
0
@@ -1,3 +1,12 @@
0
 #! /usr/bin/env ruby
0
 
0
-loop { STDOUT.puts('server'); STDOUT.flush; sleep 1 }
0
\ No newline at end of file
0
+data = ''
0
+
0
+loop do
0
+ STDOUT.puts('server');
0
+ STDOUT.flush;
0
+
0
+ 100000.times { data << 'x' }
0
+
0
+ sleep 0.1
0
+end
0
\ No newline at end of file

Comments

    No one has commented yet.