From 7432157b95198832421ce474f1a84f7d8674a875 Mon Sep 17 00:00:00 2001 From: "Gao,Yan" Date: Thu, 29 Oct 2015 12:33:02 +0100 Subject: [PATCH 1/2] Fix: services: Correctly determine if operations are in-flight Previously, we kind of abused the return codes of upstart_job_exec(), systemd_unit_exec() and services_os_action_execute(). For an asynchronous operation, the return code is supposed to tell whether the 'op' should be freed by the caller. It doesn't exactly indicate whether the 'op' is in-flight. For instance, in services_os_action_execute(), if stat() or fork() fail, operation_finalize() will be called for an asynchronous operation, which will free the operation and return TRUE if the operation is non-recurring. But in action_async_helper(), the freed operation would be considered in-flight and added into inflight_ops list. --- lib/services/services.c | 15 ++++++++------- lib/services/services_linux.c | 13 +++++++++++-- lib/services/services_private.h | 2 +- lib/services/systemd.c | 13 +++++++++++-- lib/services/systemd.h | 2 +- lib/services/upstart.c | 17 ++++++++++++++--- lib/services/upstart.h | 2 +- 7 files changed, 47 insertions(+), 17 deletions(-) diff --git a/lib/services/services.c b/lib/services/services.c index 4609a7da00b..2d4eb945020 100644 --- a/lib/services/services.c +++ b/lib/services/services.c @@ -584,21 +584,22 @@ handle_duplicate_recurring(svc_action_t * op, void (*action_callback) (svc_actio static gboolean action_async_helper(svc_action_t * op) { gboolean res = FALSE; + gboolean inflight = FALSE; if (op->standard && strcasecmp(op->standard, "upstart") == 0) { #if SUPPORT_UPSTART - res = upstart_job_exec(op, FALSE); + res = upstart_job_exec(op, FALSE, &inflight); #endif } else if (op->standard && strcasecmp(op->standard, "systemd") == 0) { #if SUPPORT_SYSTEMD - res = systemd_unit_exec(op); + res = systemd_unit_exec(op, &inflight); #endif } else { - res = services_os_action_execute(op, FALSE); + res = services_os_action_execute(op, FALSE, &inflight); } /* keep track of ops that are in-flight to avoid collisions in the same namespace */ - if (op->rsc && res) { + if (op->rsc && inflight) { inflight_ops = g_list_append(inflight_ops, op); } @@ -703,14 +704,14 @@ services_action_sync(svc_action_t * op) op->synchronous = true; if (op->standard && strcasecmp(op->standard, "upstart") == 0) { #if SUPPORT_UPSTART - rc = upstart_job_exec(op, TRUE); + rc = upstart_job_exec(op, TRUE, NULL); #endif } else if (op->standard && strcasecmp(op->standard, "systemd") == 0) { #if SUPPORT_SYSTEMD - rc = systemd_unit_exec(op); + rc = systemd_unit_exec(op, NULL); #endif } else { - rc = services_os_action_execute(op, TRUE); + rc = services_os_action_execute(op, TRUE, NULL); } crm_trace(" > %s_%s_%d: %s = %d", op->rsc, op->action, op->interval, op->opaque->exec, op->rc); if (op->stdout_data) { diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index 57669d9d1c1..a6d26adb080 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -589,9 +589,10 @@ action_synced_wait(svc_action_t * op, sigset_t mask) } -/* Returns FALSE if 'op' should be free'd by the caller */ +/* For an asynchronous 'op', returns FALSE if 'op' should be free'd by the caller */ +/* For a synchronous 'op', returns FALSE if 'op' fails */ gboolean -services_os_action_execute(svc_action_t * op, gboolean synchronous) +services_os_action_execute(svc_action_t * op, gboolean synchronous, gboolean * inflight) { int stdout_fd[2]; int stderr_fd[2]; @@ -599,6 +600,10 @@ services_os_action_execute(svc_action_t * op, gboolean synchronous) sigset_t old_mask; struct stat st; + if (inflight) { + *inflight = FALSE; + } + if (pipe(stdout_fd) < 0) { crm_err("pipe() failed"); } @@ -702,6 +707,10 @@ services_os_action_execute(svc_action_t * op, gboolean synchronous) op->opaque->stderr_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stderr_fd, op, &stderr_callbacks); + + if (inflight) { + *inflight = TRUE; + } } return TRUE; diff --git a/lib/services/services_private.h b/lib/services/services_private.h index a98cd91dd90..87303c39ec8 100644 --- a/lib/services/services_private.h +++ b/lib/services/services_private.h @@ -44,7 +44,7 @@ struct svc_action_private_s { GList *services_os_get_directory_list(const char *root, gboolean files, gboolean executable); -gboolean services_os_action_execute(svc_action_t * op, gboolean synchronous); +gboolean services_os_action_execute(svc_action_t * op, gboolean synchronous, gboolean * inflight); GList *resources_os_list_lsb_agents(void); diff --git a/lib/services/systemd.c b/lib/services/systemd.c index ca56915d4a5..c3d9544cac4 100644 --- a/lib/services/systemd.c +++ b/lib/services/systemd.c @@ -648,11 +648,17 @@ systemd_timeout_callback(gpointer p) return FALSE; } +/* For an asynchronous 'op', returns FALSE if 'op' should be free'd by the caller */ +/* For a synchronous 'op', returns FALSE if 'op' fails */ gboolean -systemd_unit_exec(svc_action_t * op) +systemd_unit_exec(svc_action_t * op, gboolean * inflight) { char *unit = NULL; + if (inflight) { + *inflight = FALSE; + } + CRM_ASSERT(op); CRM_ASSERT(systemd_init()); op->rc = PCMK_OCF_UNKNOWN_ERROR; @@ -665,7 +671,7 @@ systemd_unit_exec(svc_action_t * op) op->rc = PCMK_OCF_OK; if (op->synchronous == FALSE) { - operation_finalize(op); + return operation_finalize(op); } return TRUE; } @@ -675,6 +681,9 @@ systemd_unit_exec(svc_action_t * op) if (op->synchronous == FALSE) { op->opaque->timerid = g_timeout_add(op->timeout + 5000, systemd_timeout_callback, op); + if (inflight) { + *inflight = TRUE; + } return TRUE; } diff --git a/lib/services/systemd.h b/lib/services/systemd.h index c86bafe5b87..e1cd9591ab1 100644 --- a/lib/services/systemd.h +++ b/lib/services/systemd.h @@ -17,7 +17,7 @@ */ G_GNUC_INTERNAL GList *systemd_unit_listall(void); -G_GNUC_INTERNAL int systemd_unit_exec(svc_action_t * op); +G_GNUC_INTERNAL int systemd_unit_exec(svc_action_t * op, gboolean * inflight); G_GNUC_INTERNAL gboolean systemd_unit_exists(const gchar * name); G_GNUC_INTERNAL gboolean systemd_unit_running(const gchar * name); G_GNUC_INTERNAL void systemd_cleanup(void); diff --git a/lib/services/upstart.c b/lib/services/upstart.c index eb8cfa83131..8b3804a046d 100644 --- a/lib/services/upstart.c +++ b/lib/services/upstart.c @@ -426,8 +426,10 @@ upstart_async_dispatch(DBusPendingCall *pending, void *user_data) } } +/* For an asynchronous 'op', returns FALSE if 'op' should be free'd by the caller */ +/* For a synchronous 'op', returns FALSE if 'op' fails */ gboolean -upstart_job_exec(svc_action_t * op, gboolean synchronous) +upstart_job_exec(svc_action_t * op, gboolean synchronous, gboolean * inflight) { char *job = NULL; int arg_wait = TRUE; @@ -439,6 +441,10 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous) DBusMessage *reply = NULL; DBusMessageIter iter, array_iter; + if (inflight) { + *inflight = FALSE; + } + op->rc = PCMK_OCF_UNKNOWN_ERROR; CRM_ASSERT(upstart_init()); @@ -481,6 +487,9 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous) return op->rc == PCMK_OCF_OK; } else if (pending) { services_set_op_pending(op, pending); + if (inflight) { + *inflight = TRUE; + } return TRUE; } return FALSE; @@ -524,6 +533,9 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous) if(pending) { services_set_op_pending(op, pending); + if (inflight) { + *inflight = TRUE; + } return TRUE; } return FALSE; @@ -567,8 +579,7 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous) } if (op->synchronous == FALSE) { - operation_finalize(op); - return TRUE; + return operation_finalize(op); } return op->rc == PCMK_OCF_OK; } diff --git a/lib/services/upstart.h b/lib/services/upstart.h index 889b7b70147..352d594a04d 100644 --- a/lib/services/upstart.h +++ b/lib/services/upstart.h @@ -24,7 +24,7 @@ # include "crm/services.h" G_GNUC_INTERNAL GList *upstart_job_listall(void); -G_GNUC_INTERNAL int upstart_job_exec(svc_action_t * op, gboolean synchronous); +G_GNUC_INTERNAL int upstart_job_exec(svc_action_t * op, gboolean synchronous, gboolean * inflight); G_GNUC_INTERNAL gboolean upstart_job_exists(const gchar * name); G_GNUC_INTERNAL gboolean upstart_job_running(const gchar * name); G_GNUC_INTERNAL void upstart_cleanup(void); From 9ccf764c3194d6c836c752feccd4c5d1ed990bb2 Mon Sep 17 00:00:00 2001 From: "Gao,Yan" Date: Thu, 29 Oct 2015 14:48:50 +0100 Subject: [PATCH 2/2] Fix: systemd: Directly return an error if the connection to System DBus is closed According to the documentation of dbus_connection_send_with_reply(), if the connection is disconnected or you try to send Unix file descriptors on a connection that does not support them, the DBusPendingCall will be set to NULL. In this case, we can directly return an error instead of waiting for timeout. --- lib/services/dbus.c | 2 +- lib/services/systemd.c | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/services/dbus.c b/lib/services/dbus.c index d5d68ac5f88..fc6ba660351 100644 --- a/lib/services/dbus.c +++ b/lib/services/dbus.c @@ -176,7 +176,7 @@ DBusPendingCall* pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection, return NULL; } else if (pending == NULL) { - crm_err("No pending call found for %s", method); + crm_err("No pending call found for %s: Connection to System DBus may be closed", method); return NULL; } diff --git a/lib/services/systemd.c b/lib/services/systemd.c index c3d9544cac4..5513687c22c 100644 --- a/lib/services/systemd.c +++ b/lib/services/systemd.c @@ -529,9 +529,10 @@ systemd_unit_exec_with_unit(svc_action_t * op, const char *unit) } else if (pending) { services_set_op_pending(op, pending); return TRUE; - } - return FALSE; + } else { + return operation_finalize(op); + } } else if (g_strcmp0(method, "start") == 0) { FILE *file_strm = NULL; @@ -611,8 +612,10 @@ systemd_unit_exec_with_unit(svc_action_t * op, const char *unit) if(pending) { services_set_op_pending(op, pending); return TRUE; + + } else { + return operation_finalize(op); } - return FALSE; } else { DBusError error; @@ -680,11 +683,16 @@ systemd_unit_exec(svc_action_t * op, gboolean * inflight) free(unit); if (op->synchronous == FALSE) { - op->opaque->timerid = g_timeout_add(op->timeout + 5000, systemd_timeout_callback, op); - if (inflight) { - *inflight = TRUE; + if (op->opaque->pending) { + op->opaque->timerid = g_timeout_add(op->timeout + 5000, systemd_timeout_callback, op); + if (inflight) { + *inflight = TRUE; + } + return TRUE; + + } else { + return operation_finalize(op); } - return TRUE; } return op->rc == PCMK_OCF_OK;