Skip to content

Commit

Permalink
Cluster: Periodically dump status json.
Browse files Browse the repository at this point in the history
Refs #5444
  • Loading branch information
Michael Friedrich committed Feb 13, 2014
1 parent 48a06d3 commit f5d40ba
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 10 deletions.
1 change: 1 addition & 0 deletions components/cluster/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,5 @@ install(TARGETS cluster RUNTIME DESTINATION ${CMAKE_INSTALL_SBINDIR} LIBRARY DES
#install(CODE "file(MAKE_DIRECTORY \"\$ENV{DESTDIR}${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/lib/icinga2/cluster\")")
install(CODE "file(MAKE_DIRECTORY \"\$ENV{DESTDIR}${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/lib/icinga2/cluster/config\")")
install(CODE "file(MAKE_DIRECTORY \"\$ENV{DESTDIR}${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/lib/icinga2/cluster/log\")")
install(CODE "file(MAKE_DIRECTORY \"\$ENV{DESTDIR}${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/icinga2/cluster\")")

5 changes: 4 additions & 1 deletion components/cluster/cluster-type.conf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ type ClusterListener {

%attribute array "peers" {
%attribute name(Endpoint) "*"
}
},

%attribute string "status_path",
%attribute number "status_update_interval"
}

type Endpoint {
Expand Down
145 changes: 145 additions & 0 deletions components/cluster/clusterlistener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include "cluster/clusterlistener.h"
#include "cluster/endpoint.h"
#include "icinga/cib.h"
#include "icinga/domain.h"
#include "icinga/icingaapplication.h"
#include "base/netstring.h"
Expand Down Expand Up @@ -119,6 +120,12 @@ void ClusterListener::Start(void)
}
}
}

m_StatusTimer = make_shared<Timer>();
m_StatusTimer->SetInterval(GetStatusUpdateInterval());
m_StatusTimer->OnTimerExpired.connect(boost::bind(&ClusterListener::StatusTimerHandler, this));
m_StatusTimer->Start();
m_StatusTimer->Reschedule(0);
}

/**
Expand Down Expand Up @@ -1558,3 +1565,141 @@ bool ClusterListener::SupportsNotifications(void)

return !type->GetObjects().empty() && IcingaApplication::GetInstance()->GetEnableNotifications();
}

bool ClusterListener::SupportsFeature(const String& name)
{
DynamicType::Ptr type = DynamicType::GetByName(name);

if (!type)
return false;

return !type->GetObjects().empty();
}

void ClusterListener::StatusTimerHandler(void)
{
Log(LogInformation, "cluster", "Writing cluster.json file");

String statuspath = GetStatusPath();
String statuspathtmp = statuspath + ".tmp"; /* XXX make this a global definition */

std::ofstream statusfp;
statusfp.open(statuspathtmp.CStr(), std::ofstream::out | std::ofstream::trunc);

statusfp << std::fixed;

statusfp << JsonSerialize(GetClusterStatus());

statusfp.close();

#ifdef _WIN32
_unlink(statuspath.CStr());
#endif /* _WIN32 */

if (rename(statuspathtmp.CStr(), statuspath.CStr()) < 0) {
BOOST_THROW_EXCEPTION(posix_error()
<< boost::errinfo_api_function("rename")
<< boost::errinfo_errno(errno)
<< boost::errinfo_file_name(statuspathtmp));
}

Log(LogInformation, "cluster", "Finished writing cluster.json file");
}

Dictionary::Ptr ClusterListener::GetClusterStatus(void)
{
Dictionary::Ptr bag = make_shared<Dictionary>();

/* cluster stats */
bag->Set("node", IcingaApplication::GetInstance()->GetNodeName());
bag->Set("identity", GetIdentity());

double count_endpoints = 0;
Array::Ptr not_connected_endpoints = make_shared<Array>();
Array::Ptr connected_endpoints = make_shared<Array>();

BOOST_FOREACH(const Endpoint::Ptr& endpoint, DynamicType::GetObjects<Endpoint>()) {
count_endpoints++;

if(!endpoint->IsConnected() && endpoint->GetName() != GetIdentity())
not_connected_endpoints->Add(endpoint->GetName());
else if(endpoint->IsConnected() && endpoint->GetName() != GetIdentity())
connected_endpoints->Add(endpoint->GetName());
}

std::sort(not_connected_endpoints->Begin(), not_connected_endpoints->End());
std::sort(connected_endpoints->Begin(), connected_endpoints->End());

bag->Set("num_endpoints", count_endpoints);
bag->Set("num_conn_endpoints", connected_endpoints->GetLength());
bag->Set("num_not_conn_endpoints", not_connected_endpoints->GetLength());
bag->Set("conn_endpoints", connected_endpoints);
bag->Set("not_conn_endpoints", not_connected_endpoints);

/* features */
bag->Set("feature_CheckerComponent", SupportsChecks() ? 1 : 0);
bag->Set("feature_NotificationComponent", SupportsNotifications() ? 1 : 0);

/* XXX find a more generic way of getting features as a list */
bag->Set("feature_IdoMysqlConnection", SupportsFeature("IdoMysqlConnection") ? 1 : 0);
bag->Set("feature_IdoPgsqlConnection", SupportsFeature("IdoPgsqlConnection") ? 1 : 0);
bag->Set("feature_StatusDataWriter", SupportsFeature("StatusDataWriter") ? 1 : 0);
bag->Set("feature_CompatLogger", SupportsFeature("CompatLogger") ? 1 : 0);
bag->Set("feature_ExternalCommandListener", SupportsFeature("ExternalCommandListener") ? 1 : 0);
bag->Set("feature_CheckResultReader", SupportsFeature("CheckResultReader") ? 1 : 0);
bag->Set("feature_LivestatusListener", SupportsFeature("LivestatusListener") ? 1 : 0);
bag->Set("feature_GraphiteWriter", SupportsFeature("GraphiteWriter") ? 1 : 0);
bag->Set("feature_PerfdataWriter", SupportsFeature("PerfdataWriter") ? 1 : 0);
bag->Set("feature_FileLogger", SupportsFeature("FileLogger") ? 1 : 0);
bag->Set("feature_SyslogLogger", SupportsFeature("SyslogLogger") ? 1 : 0);


/* icinga stats */
double interval = Utility::GetTime() - Application::GetStartTime();

if (interval > 60)
interval = 60;

bag->Set("active_checks", CIB::GetActiveChecksStatistics(interval) / interval);
bag->Set("passive_checks", CIB::GetPassiveChecksStatistics(interval) / interval);

bag->Set("active_checks_1min", CIB::GetActiveChecksStatistics(60));
bag->Set("passive_checks_1min", CIB::GetPassiveChecksStatistics(60));
bag->Set("active_checks_5min", CIB::GetActiveChecksStatistics(60 * 5));
bag->Set("passive_checks_5min", CIB::GetPassiveChecksStatistics(60 * 5));
bag->Set("active_checks_15min", CIB::GetActiveChecksStatistics(60 * 15));
bag->Set("passive_checks_15min", CIB::GetPassiveChecksStatistics(60 * 15));

ServiceCheckStatistics scs = CIB::CalculateServiceCheckStats();

bag->Set("min_latency", scs.min_latency);
bag->Set("max_latency", scs.max_latency);
bag->Set("avg_latency", scs.avg_latency);
bag->Set("min_execution_time", scs.min_latency);
bag->Set("max_execution_time", scs.max_latency);
bag->Set("avg_execution_time", scs.avg_execution_time);

ServiceStatistics ss = CIB::CalculateServiceStats();

bag->Set("num_services_ok", ss.services_ok);
bag->Set("num_services_warning", ss.services_warning);
bag->Set("num_services_critical", ss.services_critical);
bag->Set("num_services_unknown", ss.services_unknown);
bag->Set("num_services_pending", ss.services_pending);
bag->Set("num_services_unreachable", ss.services_unreachable);
bag->Set("num_services_flapping", ss.services_flapping);
bag->Set("num_services_in_downtime", ss.services_in_downtime);
bag->Set("num_services_acknowledged", ss.services_acknowledged);

HostStatistics hs = CIB::CalculateHostStats();

bag->Set("num_hosts_up", hs.hosts_up);
bag->Set("num_hosts_down", hs.hosts_down);
bag->Set("num_hosts_unreachable", hs.hosts_unreachable);
bag->Set("num_hosts_flapping", hs.hosts_flapping);
bag->Set("num_hosts_in_downtime", hs.hosts_in_downtime);
bag->Set("num_hosts_acknowledged", hs.hosts_acknowledged);

return bag;
}

6 changes: 6 additions & 0 deletions components/cluster/clusterlistener.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ class ClusterListener : public ObjectImpl<ClusterListener>
Timer::Ptr m_ClusterTimer;
void ClusterTimerHandler(void);

Timer::Ptr m_StatusTimer;
void StatusTimerHandler(void);

std::set<TcpSocket::Ptr> m_Servers;

void AddListener(const String& service);
Expand Down Expand Up @@ -107,12 +110,15 @@ class ClusterListener : public ObjectImpl<ClusterListener>

static bool SupportsChecks(void);
static bool SupportsNotifications(void);
static bool SupportsFeature(const String& name);

void SetSecurityInfo(const Dictionary::Ptr& message, const DynamicObject::Ptr& object, int privs);

void PersistMessage(const Endpoint::Ptr& source, const Dictionary::Ptr& message);

static void MessageExceptionHandler(boost::exception_ptr exp);

Dictionary::Ptr GetClusterStatus(void);
};

}
Expand Down
7 changes: 7 additions & 0 deletions components/cluster/clusterlistener.ti
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "base/dynamicobject.h"
#include "base/application.h"

namespace icinga
{
Expand All @@ -14,6 +15,12 @@ class ClusterListener : DynamicObject
[config] Array::Ptr peers;
[state] double log_message_timestamp;
String identity;
[config] String status_path {
default {{{ return Application::GetLocalStateDir() + "/cache/icinga2/cluster/cluster.json"; }}}
};
[config] double status_update_interval {
default {{{ return 15; }}}
};
};

}
20 changes: 11 additions & 9 deletions doc/4.3-object-types.md
Original file line number Diff line number Diff line change
Expand Up @@ -851,15 +851,17 @@ Example:

Attributes:

Name |Description
----------------|----------------
cert\_path |**Required.** Path to the public key.
key\_path |**Required.** Path to the private key.
ca\_path |**Required.** Path to the CA certificate file.
crl\_path |**Optional.** Path to the CRL file.
bind\_host |**Optional.** The IP address the cluster listener should be bound to.
bind\_port |**Optional.** The port the cluster listener should be bound to.
peers |**Optional.** A list of
Name |Description
--------------------------|--------------------------
cert\_path |**Required.** Path to the public key.
key\_path |**Required.** Path to the private key.
ca\_path |**Required.** Path to the CA certificate file.
crl\_path |**Optional.** Path to the CRL file.
bind\_host |**Optional.** The IP address the cluster listener should be bound to.
bind\_port |**Optional.** The port the cluster listener should be bound to.
peers |**Optional.** A list of
status\_path |**Optional.** Path to cluster status file. Defaults to IcingaLocalStateDir + "/cache/icinga2/cluster/cluster.json"
status\_update\_interval |**Optional.** The interval in which the status files are updated. Defaults to 15 seconds.

### <a id="objecttype-endpoint"></a> Endpoint

Expand Down

0 comments on commit f5d40ba

Please sign in to comment.