Skip to content

Commit

Permalink
Fix: systemd: Ensure failed monitor operations always return
Browse files Browse the repository at this point in the history
  • Loading branch information
beekhof committed Oct 31, 2014
1 parent a7cf095 commit fccd046
Show file tree
Hide file tree
Showing 9 changed files with 127 additions and 35 deletions.
5 changes: 3 additions & 2 deletions include/crm/services.h
Expand Up @@ -262,8 +262,9 @@ enum nagios_exitcode {
*/
svc_action_t *services_action_create_generic(const char *exec, const char *args[]);

void
services_action_free(svc_action_t * op);
void services_action_cleanup(svc_action_t * op);

void services_action_free(svc_action_t * op);

gboolean services_action_sync(svc_action_t * op);

Expand Down
7 changes: 6 additions & 1 deletion lib/cib/cib_utils.c
Expand Up @@ -112,8 +112,13 @@ get_cib_copy(cib_t * cib)
{
xmlNode *xml_cib;
int options = cib_scope_local | cib_sync_call;
int rc = cib->cmds->query(cib, NULL, &xml_cib, options);
int rc = pcmk_ok;

if (cib->state == cib_disconnected) {
return NULL;
}

rc = cib->cmds->query(cib, NULL, &xml_cib, options);
if (rc == -EACCES) {
return NULL;

Expand Down
26 changes: 16 additions & 10 deletions lib/services/dbus.c
Expand Up @@ -145,7 +145,7 @@ DBusMessage *pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, D
return reply;
}

bool pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection,
DBusPendingCall* pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection,
void(*done)(DBusPendingCall *pending, void *user_data), void *user_data)
{
DBusError error;
Expand All @@ -161,30 +161,31 @@ bool pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection,
// send message and get a handle for a reply
if (!dbus_connection_send_with_reply (connection, msg, &pending, -1/* aka. DBUS_TIMEOUT_USE_DEFAULT */)) { // -1 is default timeout
crm_err("Send with reply failed for %s", method);
return FALSE;
return NULL;

} else if (pending == NULL) {
crm_err("No pending call found for %s", method);
return FALSE;
return NULL;

}

if (done && dbus_pending_call_get_completed(pending)) {
crm_info("DBus %s call completed too soon");
#if 1
crm_trace("DBus %s call sent", method);
if (dbus_pending_call_get_completed(pending)) {
crm_info("DBus %s call completed too soon", method);
if(done) {
#if 0
/* This sounds like a good idea, but allegedly it breaks things */
done(pending, user_data);
pending = NULL;
#else
CRM_ASSERT(dbus_pending_call_set_notify(pending, done, user_data, NULL));
#endif
}

} else if(done) {
CRM_ASSERT(dbus_pending_call_set_notify(pending, done, user_data, NULL));

} else {
crm_info("DBus %s call completed too soon");
}
return TRUE;
return pending;
}

bool pcmk_dbus_type_check(DBusMessage *msg, DBusMessageIter *field, int expected, const char *function, int line)
Expand Down Expand Up @@ -289,6 +290,11 @@ pcmk_dbus_lookup_result(DBusMessage *reply, struct db_getall_data *data)
dbus_message_iter_next (&dict);
}

if(data->name && data->callback) {
crm_trace("No value for property %s[%s]", data->object, data->name);
data->callback(data->name, NULL, data->userdata);
}

cleanup:
free(data->target);
free(data->object);
Expand Down
2 changes: 1 addition & 1 deletion lib/services/pcmk-dbus.h
Expand Up @@ -2,7 +2,7 @@ DBusConnection *pcmk_dbus_connect(void);
void pcmk_dbus_connection_setup_with_select(DBusConnection *c);
void pcmk_dbus_disconnect(DBusConnection *connection);

bool pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection,
DBusPendingCall *pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection,
void(*done)(DBusPendingCall *pending, void *user_data), void *user_data);
DBusMessage *pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, DBusError *error);
bool pcmk_dbus_type_check(DBusMessage *msg, DBusMessageIter *field, int expected, const char *function, int line);
Expand Down
38 changes: 30 additions & 8 deletions lib/services/services.c
Expand Up @@ -303,18 +303,23 @@ services_action_create_generic(const char *exec, const char *args[])
}

void
services_action_free(svc_action_t * op)
services_action_cleanup(svc_action_t * op)
{
unsigned int i;

if (op == NULL) {
return;
if(op->opaque->timerid != 0) {
crm_trace("Removing timer for call %s to %s", op->action, op->rsc);
g_source_remove(op->opaque->timerid);
op->opaque->timerid = 0;
}

if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
if(op->opaque->pending) {
if(dbus_pending_call_get_completed(op->opaque->pending)) {
crm_warn("Pending dbus call %s for %s did not complete", op->action, op->rsc);
}
dbus_pending_call_cancel(op->opaque->pending);
dbus_pending_call_unref(op->opaque->pending);
op->opaque->pending = NULL;
}

if (op->opaque->stderr_gsource) {
mainloop_del_fd(op->opaque->stderr_gsource);
op->opaque->stderr_gsource = NULL;
Expand All @@ -324,6 +329,23 @@ services_action_free(svc_action_t * op)
mainloop_del_fd(op->opaque->stdout_gsource);
op->opaque->stdout_gsource = NULL;
}
}

void
services_action_free(svc_action_t * op)
{
unsigned int i;

if (op == NULL) {
return;
}

services_action_cleanup(op);

if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}

free(op->id);
free(op->opaque->exec);
Expand Down
2 changes: 2 additions & 0 deletions lib/services/services_linux.c
Expand Up @@ -264,6 +264,8 @@ operation_finalize(svc_action_t * op)
services_action_free(op);
return TRUE;
}

services_action_cleanup(op);
return FALSE;
}

Expand Down
8 changes: 8 additions & 0 deletions lib/services/services_private.h
Expand Up @@ -19,6 +19,10 @@
#ifndef __MH_SERVICES_PRIVATE_H__
# define __MH_SERVICES_PRIVATE_H__

#if SUPPORT_DBUS
# include <dbus/dbus.h>
#endif

struct svc_action_private_s {
char *exec;
char *args[255];
Expand All @@ -31,6 +35,10 @@ struct svc_action_private_s {

int stdout_fd;
mainloop_io_t *stdout_gsource;
#if SUPPORT_DBUS
DBusPendingCall* pending;
unsigned timerid;
#endif
};

GList *services_os_get_directory_list(const char *root, gboolean files, gboolean executable);
Expand Down
65 changes: 53 additions & 12 deletions lib/services/systemd.c
Expand Up @@ -115,18 +115,18 @@ systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data)
{
DBusError error;
DBusMessage *reply = NULL;
int *reload_count = user_data;
unsigned int reload_count = GPOINTER_TO_UINT(user_data);

dbus_error_init(&error);
if(pending) {
reply = dbus_pending_call_steal_reply(pending);
}

if(pcmk_dbus_find_error("Reload", pending, reply, &error)) {
crm_err("Could not issue systemd reload %d: %s", *reload_count, error.message);
crm_err("Could not issue systemd reload %d: %s", reload_count, error.message);

} else {
crm_trace("Reload %d complete", *reload_count);
crm_trace("Reload %d complete", reload_count);
}

if(pending) {
Expand All @@ -140,13 +140,18 @@ systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data)
static bool
systemd_daemon_reload(void)
{
static int reload_count = 0;
static unsigned int reload_count = 0;
const char *method = "Reload";
DBusMessage *msg = systemd_new_method(BUS_NAME".Manager", method);

CRM_ASSERT(msg != NULL);
pcmk_dbus_send(msg, systemd_proxy, systemd_daemon_reload_complete, &reload_count);
dbus_message_unref(msg);

reload_count++;
if(reload_count % 10 == 0) {
DBusMessage *msg = systemd_new_method(BUS_NAME".Manager", method);

CRM_ASSERT(msg != NULL);
pcmk_dbus_send(msg, systemd_proxy, systemd_daemon_reload_complete, GUINT_TO_POINTER(reload_count));
dbus_message_unref(msg);
}
return TRUE;
}

Expand Down Expand Up @@ -178,11 +183,17 @@ static void
systemd_loadunit_cb(DBusPendingCall *pending, void *user_data)
{
DBusMessage *reply = NULL;
svc_action_t * op = user_data;

if(pending) {
reply = dbus_pending_call_steal_reply(pending);
}

if(op) {
crm_trace("Got result: %p for %p for %s, %s", reply, pending, op->rsc, op->action);
} else {
crm_trace("Got result: %p for %p", reply, pending);
}
systemd_loadunit_result(reply, user_data);

if(reply) {
Expand Down Expand Up @@ -444,6 +455,12 @@ systemd_async_dispatch(DBusPendingCall *pending, void *user_data)
reply = dbus_pending_call_steal_reply(pending);
}

if(op) {
crm_trace("Got result: %p for %p for %s, %s", reply, pending, op->rsc, op->action);
} else {
crm_trace("Got result: %p for %p", reply, pending);
}
op->opaque->pending = NULL;
systemd_exec_result(reply, op);

if(pending) {
Expand All @@ -460,10 +477,13 @@ static void
systemd_unit_check(const char *name, const char *state, void *userdata)
{
svc_action_t * op = userdata;

CRM_ASSERT(state != NULL);

if (g_strcmp0(state, "active") == 0) {
crm_trace("Resource %s has %s='%s'", op->rsc, name, state);

if(state == NULL) {
op->rc = PCMK_OCF_NOT_RUNNING;

} else if (g_strcmp0(state, "active") == 0) {
op->rc = PCMK_OCF_OK;
} else if (g_strcmp0(state, "activating") == 0) {
op->rc = PCMK_OCF_PENDING;
Expand All @@ -472,6 +492,7 @@ systemd_unit_check(const char *name, const char *state, void *userdata)
}

if (op->synchronous == FALSE) {
op->opaque->pending = NULL;
operation_finalize(op);
}
}
Expand Down Expand Up @@ -562,7 +583,14 @@ systemd_unit_exec_with_unit(svc_action_t * op, const char *unit)
}

if (op->synchronous == FALSE) {
return pcmk_dbus_send(msg, systemd_proxy, systemd_async_dispatch, op);
DBusPendingCall* pending = pcmk_dbus_send(msg, systemd_proxy, systemd_async_dispatch, op);

if(pending) {
dbus_pending_call_ref(pending);
op->opaque->pending = pending;
return TRUE;
}
return FALSE;

} else {
DBusError error;
Expand Down Expand Up @@ -593,6 +621,18 @@ systemd_unit_exec_with_unit(svc_action_t * op, const char *unit)
return op->rc == PCMK_OCF_OK;
}

static gboolean
systemd_timeout_callback(gpointer p)
{
svc_action_t * op = p;

op->opaque->timerid = 0;
crm_warn("%s operation on systemd unit %s named '%s' timed out", op->action, op->agent, op->rsc);
operation_finalize(op);

return FALSE;
}

gboolean
systemd_unit_exec(svc_action_t * op)
{
Expand All @@ -619,6 +659,7 @@ systemd_unit_exec(svc_action_t * op)
free(unit);

if (op->synchronous == FALSE) {
op->opaque->timerid = g_timeout_add(op->timeout + 5000, systemd_timeout_callback, op);
return TRUE;
}

Expand Down
9 changes: 8 additions & 1 deletion lib/services/upstart.c
Expand Up @@ -513,8 +513,15 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous)
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_BOOLEAN, &arg_wait, DBUS_TYPE_INVALID));

if (op->synchronous == FALSE) {
DBusPendingCall* pending = pcmk_dbus_send(msg, upstart_proxy, upstart_async_dispatch, op);
free(job);
return pcmk_dbus_send(msg, upstart_proxy, upstart_async_dispatch, op);

if(pending) {
dbus_pending_call_ref(pending);
op->opaque->pending = pending;
return TRUE;
}
return FALSE;
}

dbus_error_init(&error);
Expand Down

0 comments on commit fccd046

Please sign in to comment.