/
containerized.rb
337 lines (311 loc) · 9.4 KB
/
containerized.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
##
# =Containerized Models
#
module Containerized
extend ActiveSupport::Concern
class_methods do
# Given a container name, find the local resource
def resource_by_name(name)
return nil if name.blank?
c = Deployment::Container.find_by(name: name)
c = Deployment::Sftp.find_by(name: name) if c.nil?
c = LoadBalancer.find_by(name: name) if c.nil?
c
end
end
# @return [Boolean]
def latest_image?
c = docker_client
# if the container is not on the node, then we _will_ have the latest image
return true if c.nil?
c_image_tag = c.info['Config']['Image']
c_image_id = c.info['Image']
running_image = Docker::Image.get(c_image_id, {}, node.client(3))
running_image.info['RepoTags'].include? c_image_tag
rescue Docker::Error::NotFoundError
true
rescue => e
event.event_details.create!(
data: e.message,
event_code: "20fb4e443fde83de"
)
false
end
def halt_auto_recovery?
error_events = event_logs.where( Arel.sql(%Q(created_at >= '#{1.hour.ago.iso8601}')) ).starting.failed.count
start_events = event_logs.where( Arel.sql(%Q(created_at >= '#{30.minutes.ago.iso8601}')) ).starting.count
stop_events = event_logs.where( Arel.sql(%Q(created_at >= '#{30.minutes.ago.iso8601}')) ).starting.count
error_events > 3 || start_events > 6 || stop_events > 6
end
# Given a raw status from Docker, do we need to intervene?
# @param [String] docker_status
# @return [Boolean]
def requires_intervention?(docker_status)
return true if docker_status == 'running' && !active?
%w[created stopped exited].include?(docker_status) && active?
end
def metadata_env_params
[
%W[METADATA_SERVICE http://metadata.internal:8500/v1/kv/projects/#{deployment.token}],
%W[METADATA_URL http://metadata.internal:8500/v1/kv/projects/#{deployment.token}/metadata?raw=true],
%W[METADATA_AUTH #{deployment.consul_auth_key}]
]
end
# This is used by both Container and SFTP.
def build!(event)
build_client = if self.kind_of? Deployment::Container
runtime_config event&.audit
elsif self.kind_of? Deployment::Sftp
build_command
else
nil
end
return false if build_client.nil?
Docker::Container.create(build_client, node.client).is_a? Docker::Container
rescue Docker::Error::ConflictError => e
event.event_details.create!(
data: e.message,
event_code: "f6e058590c10e99e"
)
false
end
# @param [EventLog] event
def can_build?(event)
if built?
event.event_details.create!(
data: 'Unable to provision, container is already built; attempting start.',
event_code: '7ebfd45eba7e7dc8'
)
ContainerWorkers::StartWorker.perform_async global_id, event.global_id
return false
end
if node.nil?
event.event_details.create!(
data: "Container has no node",
event_code: 'bf1c1f517b6acefa'
)
event.fail! 'Container has no node'
return false
end
unless node.online?
event.event_details.create!(
data: 'Node is offline',
event_code: '4bea557d3255b1af'
)
event.fail! 'Node is offline'
return false
end
true
end
# @param [Boolean] fast_client
# @return [Docker::Container,nil]
def docker_client(fast_client = false)
Docker::Container.get(self.name, {}, (fast_client ? node.fast_client : node.client(3)))
rescue Excon::Error::Certificate => e
# Unable to connect due to invalid certificateX
ec = '12b4317e8dde17af'
se = SystemEvent.find_by(event_code: ec)
if se.nil?
se = SystemEvent.create!(
message: "Docker Connection Error: #{node&.region&.name}",
log_level: 'warn',
data: { message: nil, count: 0 },
event_code: ec
)
end
data = se.data
data[:message] = e.message
data[:count] = data[:count] + 1
se.update_attribute :data, data
nil
rescue Excon::Error::Socket => e
# Unable to connect due to _missing_ tls certs on docker
ec = '06daa76c88d5f72a'
se = SystemEvent.find_by(event_code: ec)
if se.nil?
se = SystemEvent.create!(
message: "Missing Docker TLS Cert on Node: #{node&.region&.name}",
log_level: 'warn',
data: { message: nil, count: 0 },
event_code: ec
)
end
data = se.data
data[:message] = e.message
data[:count] = data[:count] + 1
se.update_attribute :data, data
nil
rescue
nil
end
# @param [Array] command
# @param [EventLog,nil] event
def container_exec!(command, event, timeout = 10)
result = []
client = docker_client
return { response: [], exit_code: 2 } if client.nil?
response = client.exec(command, wait: timeout)
exit_code = 0
result << response[0]
result << response[1] unless response[1].empty?
exit_code = response[2] if response[2] > exit_code
if result.count == 1
result = result[0]
if result.is_a?(Array)
d = []
result.each do |ii|
d << if ii.kind_of?(Array)
ii.join(" ")
else
ii
end
end
result = d.join(" ")
end
end
if event
event.event_details.create!(
event_code: '76ed2ba5c0ef8883',
data: "Exit Code: #{exit_code}\n\nResponse: #{result}"
)
end
{
response: result,
exit_code: exit_code
}
end
##
# =Load Available Volumes
#
# Response
# volumes = [{
# 'service' => service name,
# 'volume' => Integer,
# 'label' => String
# }]
# def volumes
# Volume.find_by_node(self.node, self.deployment, self.class == Deployment::Sftp)
# end
# Real time lookup attached volumes
#
# Response
# volumes = [{
# 'service' => service name,
# 'volume' => Integer,
# 'label' => String
# }]
def attached_volumes
result = []
vol_ids = []
c = docker_client
binds = c.info.dig('HostConfig', 'Binds')
binds = [] if binds.nil?
binds.each do |m|
id = m.split(":").first.strip
vol_ids << id unless vol_ids.include?(id)
end
if c.info['Volumes']
vols = c.info['Volumes']
vols.each_with_index do |(k, v), i|
id = Volume.name_by_path(v)
vol_ids << id if id && !vol_ids.include?(id)
end
end
vol_ids.each do |i|
vol = Volume.find_by(name: i)
if vol
result << {
'service' => vol.container_service&.name,
'volume' => vol.name,
'label' => vol.label.blank? ? vol.container_service.label : vol.label
}
end
end
result
rescue
[]
end
def can_power?
!%w[removing rebuilding building migrating].include?(status)
end
##
# Container Status
# low-level api to grab raw container state
#
# running,stopped,error,degraded
#
# degraded = running but healthcheck failed
##
# @return [Hash,nil] { state: state, health: { state: string, count: int, log: [] } }
def container_status(raw = {})
return nil if %w[building migrating pending rebuilding trashed].include? status
if raw.empty?
if node.nil?
update status: 'pending'
nil
end
return nil unless node.online?
elsif raw['State'].nil?
return nil
end
state_raw = raw.empty? ? docker_client(true).info['State'] : raw
state = (state_raw['Running'] || state_raw['Restarting']) ? 'running' : 'stopped'
state = 'error' if state == 'stopped' && !state_raw['ExitCode'].zero?
if state_raw['Health'].nil?
update status: state
return { state: state, health: {} }
end
if state == 'running'
state = 'degraded' if state_raw['Health']['Status'] == 'unhealthy'
end
update status: state
{ state: state, health: { state: state_raw['Health']['Status'], count: state_raw['Health']['FailingStreak'], log: state_raw['Health']['Log'] }}
rescue
nil
end
##
# Generate the Docker LogDriver config hash
# @return [Hash]
def log_driver_config
##
# Temporarily find `loki` to ensure it exists on customer deployments!
f = Feature.find_by(name: 'loki')
f = Feature.create!(name: 'loki', maintenance: false, active: true) if f.nil?
ff = Feature.find_by(name: 'loki_fluentd')
ff = Feature.create!(name: 'loki_fluentd', maintenance: false, active: false) if ff.nil?
if f.active
loki_labels = ['container_name={{.Name}}']
loki_labels << "project_id=#{deployment.id}" if deployment
loki_labels << "service_id=#{service.id}" if kind_of?(Deployment::Container)
if ff.active # Fluentd
{
'Type' => 'fluentd',
'Config' => {
'fluentd-address' => 'tcp://localhost:9432',
'labels' => "com.computestacks.deployment_id,com.computestacks.service_id"
}
}
else
{
'Type' => 'loki',
'Config' => {
'max-size' => '5m',
'max-file' => '2',
'loki-url' => "#{region.loki_container_endpoint}/loki/api/v1/push",
'loki-retries' => region.loki_retries,
'loki-batch-size' => region.loki_batch_size,
'loki-external-labels' => "#{loki_labels.join(',')}"
}
}
end
else
{
'Type' => 'json-file',
'Config' => {
'max-size' => '5m',
'max-file' => '2'
}
}
end
end
end