Skip to content

Commit

Permalink
Add Cluster Health Check.
Browse files Browse the repository at this point in the history
Fixes #5438
  • Loading branch information
Michael Friedrich committed Feb 7, 2014
1 parent 7d9dc1a commit e534f9b
Show file tree
Hide file tree
Showing 8 changed files with 171 additions and 5 deletions.
24 changes: 24 additions & 0 deletions doc/6-advanced-topics.md
Expand Up @@ -273,6 +273,7 @@ You can find the state file in `/var/lib/icinga2/icinga2.state`. Before copying
the state file you should make sure that all your cluster nodes are properly shut
down.


### <a id="assign-services-to-cluster-nodes"></a> Assign Services to Cluster Nodes

By default all services are distributed among the cluster nodes with the `Checker`
Expand All @@ -295,6 +296,29 @@ attribute. Required Endpoints must be defined as array.
> services based on their location, inheriting from a global service template
> defining the authorities.
### <a id="cluster-health-check"></a> Cluster Health Check

The Icinga 2 [ITL](#itl) ships an internal check command checking all configured
`EndPoints` in the cluster setup. The check result will become critical if
one or more configured nodes are not connected.

Example:

object Host "icinga2a" inherits "generic-host" {
services["cluster"] = {
templates = [ "generic-service" ],
check_interval = 1m,
check_command = "cluster",
authorities = [ "icinga2a" ]
},
}

> **Note**
>
> Each cluster node should execute its own local cluster health check to
> get an idea about network related connection problems from different
> point of views. Use the `authorities` attribute to assign the service
> check to the configured node.
## <a id="dependencies"></a> Dependencies

Expand Down
3 changes: 3 additions & 0 deletions itl/command-common.conf
Expand Up @@ -238,3 +238,6 @@ object CheckCommand "snmp-uptime" inherits "snmp" {

object CheckCommand "icinga" inherits "icinga-check-command" {
}

object CheckCommand "cluster" inherits "cluster-check-command" {
}
6 changes: 6 additions & 0 deletions itl/command.conf
Expand Up @@ -25,6 +25,12 @@ template CheckCommand "icinga-check-command" {
}
}

template CheckCommand "cluster-check-command" {
methods = {
execute = "ClusterCheck"
}
}

template CheckCommand "plugin-check-command" {
methods = {
execute = "PluginCheck"
Expand Down
4 changes: 2 additions & 2 deletions lib/base/application.cpp
Expand Up @@ -765,8 +765,8 @@ void Application::MakeVariablesConstant(void)
ScriptVariable::GetByName("IcingaSysconfDir")->SetConstant(true);
ScriptVariable::GetByName("IcingaLocalStateDir")->SetConstant(true);
ScriptVariable::GetByName("IcingaPkgDataDir")->SetConstant(true);
ScriptVariable::GetByName("IcingaStatePath")->SetConstant(true);
ScriptVariable::GetByName("IcingaPidPath")->SetConstant(true);
ScriptVariable::GetByName("IcingaStatePath")->SetConstant(false);
ScriptVariable::GetByName("IcingaPidPath")->SetConstant(false);
ScriptVariable::GetByName("ApplicationType")->SetConstant(true);
}

Expand Down
2 changes: 1 addition & 1 deletion lib/icinga/icingaapplication.cpp
Expand Up @@ -96,7 +96,7 @@ Dictionary::Ptr IcingaApplication::GetMacros(void) const

String IcingaApplication::GetNodeName(void) const
{
return ScriptVariable::Get("IcingaNodeName");
return ScriptVariable::Get("IcingaNodeName");
}

bool IcingaApplication::ResolveMacro(const String& macro, const CheckResult::Ptr&, String *result) const
Expand Down
4 changes: 2 additions & 2 deletions lib/methods/CMakeLists.txt
Expand Up @@ -16,12 +16,12 @@
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.

add_library(methods SHARED
icingachecktask.cpp nullchecktask.cpp nulleventtask.cpp
clusterchecktask.cpp icingachecktask.cpp nullchecktask.cpp nulleventtask.cpp
pluginchecktask.cpp plugineventtask.cpp pluginnotificationtask.cpp
randomchecktask.cpp timeperiodtask.cpp
)

target_link_libraries(methods ${Boost_LIBRARIES} base config icinga)
target_link_libraries(methods ${Boost_LIBRARIES} base config icinga cluster)

set_target_properties (
methods PROPERTIES
Expand Down
88 changes: 88 additions & 0 deletions lib/methods/clusterchecktask.cpp
@@ -0,0 +1,88 @@
/******************************************************************************
* Icinga 2 *
* Copyright (C) 2012-present Icinga Development Team (http://www.icinga.org) *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License *
* as published by the Free Software Foundation; either version 2 *
* of the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software Foundation *
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
******************************************************************************/

#include "methods/clusterchecktask.h"
#include "cluster/endpoint.h"
#include "cluster/clusterlistener.h"
#include "icinga/cib.h"
#include "icinga/service.h"
#include "icinga/icingaapplication.h"
#include "base/application.h"
#include "base/objectlock.h"
#include "base/convert.h"
#include "base/utility.h"
#include "base/scriptfunction.h"
#include "base/dynamictype.h"
#include <boost/algorithm/string/join.hpp>

using namespace icinga;

REGISTER_SCRIPTFUNCTION(ClusterCheck, &ClusterCheckTask::ScriptFunc);

CheckResult::Ptr ClusterCheckTask::ScriptFunc(const Service::Ptr&)
{
double interval = Utility::GetTime() - Application::GetStartTime();

if (interval > 60)
interval = 60;

double count_endpoints = 0;
std::vector<String> not_connected_endpoints;
std::vector<String> connected_endpoints;

BOOST_FOREACH(const ClusterListener::Ptr& cluster_listener, DynamicType::GetObjects<ClusterListener>()) {
String identity = cluster_listener->GetIdentity();

BOOST_FOREACH(const Endpoint::Ptr& endpoint, DynamicType::GetObjects<Endpoint>()) {
count_endpoints++;

if(!endpoint->IsConnected() && endpoint->GetName() != identity)
not_connected_endpoints.push_back(endpoint->GetName());
else if(endpoint->IsConnected() && endpoint->GetName() != identity)
connected_endpoints.push_back(endpoint->GetName());
}
}

std::sort(not_connected_endpoints.begin(), not_connected_endpoints.end());
std::sort(connected_endpoints.begin(), connected_endpoints.end());

ServiceState state = StateOK;
String output = "Icinga 2 Cluster is running: Connected Endpoints: "+ Convert::ToString(connected_endpoints.size()) + " (" +
boost::algorithm::join(connected_endpoints, ",") + ").";

if (not_connected_endpoints.size() > 0) {
state = StateCritical;
output = "Icinga 2 Cluster Problem: " + Convert::ToString(not_connected_endpoints.size()) +
" Endpoints (" + boost::algorithm::join(not_connected_endpoints, ",") + ") not connected.";
}

Dictionary::Ptr perfdata = make_shared<Dictionary>();
perfdata->Set("num_endpoints", count_endpoints);
perfdata->Set("num_conn_endpoints", connected_endpoints.size());
perfdata->Set("num_not_conn_endpoints", not_connected_endpoints.size());

CheckResult::Ptr cr = make_shared<CheckResult>();
cr->SetOutput(output);
cr->SetPerformanceData(perfdata);
cr->SetState(state);
cr->SetCheckSource(IcingaApplication::GetInstance()->GetNodeName());

return cr;
}

45 changes: 45 additions & 0 deletions lib/methods/clusterchecktask.h
@@ -0,0 +1,45 @@
/******************************************************************************
* Icinga 2 *
* Copyright (C) 2012-present Icinga Development Team (http://www.icinga.org) *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License *
* as published by the Free Software Foundation; either version 2 *
* of the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software Foundation *
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
******************************************************************************/

#ifndef CLUSTERCHECKTASK_H
#define CLUSTERCHECKTASK_H

#include "methods/i2-methods.h"
#include "icinga/service.h"

namespace icinga
{

/**
* Cluster check type.
*
* @ingroup methods
*/
class I2_METHODS_API ClusterCheckTask
{
public:
static CheckResult::Ptr ScriptFunc(const Service::Ptr& service);

private:
ClusterCheckTask(void);
};

}

#endif /* CLUSTERCHECKTASK_H */

0 comments on commit e534f9b

Please sign in to comment.