Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(healthcheck) adding visibility to target health status #3232

Merged
merged 2 commits into from
Feb 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 91 additions & 37 deletions kong/api/routes/upstreams.lua
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ local responses = require "kong.tools.responses"
local balancer = require "kong.core.balancer"
local singletons = require "kong.singletons"
local utils = require "kong.tools.utils"
local public = require "kong.tools.public"
local cjson = require "cjson"
local cluster_events = singletons.cluster_events

Expand Down Expand Up @@ -91,6 +92,48 @@ local function post_health(is_healthy)
end


local function get_active_targets(dao_factory, upstream_id)
local target_history, err = dao_factory.targets:find_all({
upstream_id = upstream_id,
})
if not target_history then
return app_helpers.yield_error(err)
end

--sort and walk based on target and creation time
for _, target in ipairs(target_history) do
target.order = target.target .. ":" ..
target.created_at .. ":" .. target.id
end
table.sort(target_history, function(a, b) return a.order > b.order end)

local seen = {}
local active = setmetatable({}, cjson.empty_array_mt)
local active_n = 0

for _, entry in ipairs(target_history) do
if not seen[entry.target] then
if entry.weight == 0 then
seen[entry.target] = true

else
entry.order = nil -- dont show our order key to the client

-- add what we want to send to the client in our array
active_n = active_n + 1
active[active_n] = entry

-- track that we found this host:port so we only show
-- the most recent one (kinda)
seen[entry.target] = true
end
end
end

return active, active_n
end


return {
["/upstreams/"] = {
GET = function(self, dao_factory)
Expand Down Expand Up @@ -131,59 +174,70 @@ return {
end,

GET = function(self, dao_factory)
self.params.active = nil
local active, active_n = get_active_targets(dao_factory,
self.params.upstream_id)

local target_history, err = dao_factory.targets:find_all({
upstream_id = self.params.upstream_id,
})
if not target_history then
return app_helpers.yield_error(err)
end
-- for now lets not worry about rolling our own pagination
-- we also end up returning a "backwards" list of targets because
-- of how we sorted- do we care?
return responses.send_HTTP_OK {
total = active_n,
data = active,
}
end,

--sort and walk based on target and creation time
for _, target in ipairs(target_history) do
target.order = target.target .. ":" ..
target.created_at .. ":" .. target.id
end
table.sort(target_history, function(a, b) return a.order > b.order end)
POST = function(self, dao_factory, helpers)
clean_history(self.params.upstream_id, dao_factory)

local seen = {}
local active = setmetatable({}, cjson.empty_array_mt)
local active_n = 0
crud.post(self.params, dao_factory.targets)
end,
},

["/upstreams/:upstream_name_or_id/health/"] = {
before = function(self, dao_factory, helpers)
crud.find_upstream_by_name_or_id(self, dao_factory, helpers)
self.params.upstream_id = self.upstream.id
end,

for _, entry in ipairs(target_history) do
if not seen[entry.target] then
if entry.weight == 0 then
seen[entry.target] = true
GET = function(self, dao_factory)
local upstream_id = self.params.upstream_id
local active, active_n = get_active_targets(dao_factory, upstream_id)

else
entry.order = nil -- dont show our order key to the client
local node_id, err = public.get_node_id()
if err then
ngx.log(ngx.ERR, "failed getting node id: ", err)
end

-- add what we want to send to the client in our array
active_n = active_n + 1
active[active_n] = entry
local health_info
health_info, err = balancer.get_upstream_health(upstream_id)
if err then
ngx.log(ngx.ERR, "failed getting upstream health: ", err)
end

-- track that we found this host:port so we only show
-- the most recent one (kinda)
seen[entry.target] = true
end
end
for _, entry in ipairs(active) do
-- In case of DNS errors when registering a target,
-- that error happens inside lua-resty-dns-client
-- and the end-result is that it just doesn't launch the callback,
-- which means kong.core.balancer and healthchecks don't get
-- notified about the target at all. We extrapolate the DNS error
-- out of the fact that the target is missing from the balancer.
-- Note that lua-resty-dns-client does retry by itself,
-- meaning that if DNS is down and it eventually resumes working, the
-- library will issue the callback and the target will change state.
entry.health = health_info
and (health_info[entry.target] or "DNS_ERROR")
or "HEALTHCHECKS_OFF"
end

-- for now lets not worry about rolling our own pagination
-- we also end up returning a "backwards" list of targets because
-- of how we sorted- do we care?
return responses.send_HTTP_OK {
node_id = node_id,
total = active_n,
data = active,
data = active,
}
end,

POST = function(self, dao_factory, helpers)
clean_history(self.params.upstream_id, dao_factory)

crud.post(self.params, dao_factory.targets)
end,
},

["/upstreams/:upstream_name_or_id/targets/all"] = {
Expand Down
53 changes: 53 additions & 0 deletions kong/core/balancer.lua
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,58 @@ local function unsubscribe_from_healthcheck_events(callback)
end


--------------------------------------------------------------------------------
-- Get healthcheck information for an upstream.
-- @param upstream_id the id of the upstream.
-- @return one of three possible returns:
-- * if healthchecks are enabled, a table mapping keys ("ip:port") to booleans;
-- * if healthchecks are disabled, nil;
-- * in case of errors, nil and an error message.
local function get_upstream_health(upstream_id)

local upstream = get_upstream_by_id(upstream_id)
if not upstream then
return nil, "upstream not found"
end

local using_hc = upstream.healthchecks.active.healthy.interval ~= 0
or upstream.healthchecks.active.unhealthy.interval ~= 0
or upstream.healthchecks.passive.unhealthy.tcp_failures ~= 0
or upstream.healthchecks.passive.unhealthy.timeouts ~= 0
or upstream.healthchecks.passive.unhealthy.http_failures ~= 0

local balancer = balancers[upstream_id]
if not balancer then
return nil, "balancer not found"
end

local healthchecker
if using_hc then
healthchecker = healthcheckers[balancer]
if not healthchecker then
return nil, "healthchecker not found"
end
end

local health_info = {}

for weight, addr, host in balancer:addressIter() do
if weight > 0 then
local health
if using_hc then
health = healthchecker:get_target_status(addr.ip, addr.port)
and "HEALTHY" or "UNHEALTHY"
else
health = "HEALTHCHECKS_OFF"
end
health_info[host.hostname .. ":" .. addr.port] = health
end
end

return health_info
end


--------------------------------------------------------------------------------
-- for unit-testing purposes only
local function _get_healthchecker(balancer)
Expand All @@ -864,6 +916,7 @@ return {
post_health = post_health,
subscribe_to_healthcheck_events = subscribe_to_healthcheck_events,
unsubscribe_from_healthcheck_events = unsubscribe_from_healthcheck_events,
get_upstream_health = get_upstream_health,

-- ones below are exported for test purposes only
_create_balancer = create_balancer,
Expand Down
Loading