Skip to content

Commit bde602c

Browse files
authored
Merge pull request #1243 from kgaillot/fail
Track resource failures per operation
2 parents 00ef0c1 + 4f7420e commit bde602c

30 files changed

+1052
-474
lines changed

attrd/attrd_common.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,13 +248,16 @@ attrd_expand_value(const char *value, const char *old_value)
248248
*
249249
* \param[out] regex Where to store created regular expression
250250
* \param[in] rsc Name of resource to clear (or NULL for all)
251+
* \param[in] op Operation to clear if rsc is specified (or NULL for all)
252+
* \param[in] interval Interval of operation to clear if op is specified
251253
*
252254
* \return pcmk_ok on success, -EINVAL if arguments are invalid
253255
*
254256
* \note The caller is responsible for freeing the result with regfree().
255257
*/
256258
int
257-
attrd_failure_regex(regex_t *regex, const char *rsc)
259+
attrd_failure_regex(regex_t *regex, const char *rsc, const char *op,
260+
int interval)
258261
{
259262
char *pattern = NULL;
260263
int rc;
@@ -263,8 +266,11 @@ attrd_failure_regex(regex_t *regex, const char *rsc)
263266

264267
if (rsc == NULL) {
265268
pattern = strdup(ATTRD_RE_CLEAR_ALL);
266-
} else {
269+
} else if (op == NULL) {
267270
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);
271+
} else {
272+
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP,
273+
rsc, op, interval);
268274
}
269275

270276
/* Compile pattern into regular expression */

attrd/attrd_common.h

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,23 @@ int attrd_expand_value(const char *value, const char *old_value);
2727
#define ATTRD_RE_CLEAR_ALL \
2828
"^(" CRM_FAIL_COUNT_PREFIX "|" CRM_LAST_FAILURE_PREFIX ")-"
2929

30-
/* regular expression to clear failure of one resource */
31-
/* format takes resource name */
32-
#define ATTRD_RE_CLEAR_ONE ATTRD_RE_CLEAR_ALL "%s$"
30+
/* regular expression to clear failure of all operations for one resource
31+
* (format takes resource name)
32+
*
33+
* @COMPAT attributes set < 1.1.17:
34+
* also match older attributes that do not have the operation part
35+
*/
36+
#define ATTRD_RE_CLEAR_ONE ATTRD_RE_CLEAR_ALL "%s(#.+_[0-9]+)?$"
37+
38+
/* regular expression to clear failure of one operation for one resource
39+
* (format takes resource name, operation name, and interval)
40+
*
41+
* @COMPAT attributes set < 1.1.17:
42+
* also match older attributes that do not have the operation part
43+
*/
44+
#define ATTRD_RE_CLEAR_OP ATTRD_RE_CLEAR_ALL "%s(#%s_%d)?$"
3345

34-
int attrd_failure_regex(regex_t *regex, const char *rsc);
46+
int attrd_failure_regex(regex_t *regex, const char *rsc, const char *op,
47+
int interval);
3548

3649
#endif /* PCMK_ATTRD_COMMON__H */

attrd/commands.c

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -332,24 +332,39 @@ attrd_client_clear_failure(xmlNode *xml)
332332
}
333333
#endif
334334

335-
const char *rsc = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
335+
const char *rsc = crm_element_value(xml, F_ATTRD_RESOURCE);
336+
const char *op = crm_element_value(xml, F_ATTRD_OPERATION);
337+
const char *interval_s = crm_element_value(xml, F_ATTRD_INTERVAL);
336338

337-
/* Map this to an update that uses a regular expression */
339+
/* Map this to an update */
338340
crm_xml_add(xml, F_ATTRD_TASK, ATTRD_OP_UPDATE);
339341

340-
/* Add expression matching one or all resources as appropriate */
342+
/* Add regular expression matching desired attributes */
343+
341344
if (rsc) {
342-
char *pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);
345+
char *pattern;
346+
347+
if (op == NULL) {
348+
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);
349+
350+
} else {
351+
int interval = crm_get_interval(interval_s);
352+
353+
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP,
354+
rsc, op, interval);
355+
}
343356

344357
crm_xml_add(xml, F_ATTRD_REGEX, pattern);
345-
crm_xml_replace(xml, F_ATTRD_ATTRIBUTE, NULL);
346358
free(pattern);
347359

348360
} else {
349361
crm_xml_add(xml, F_ATTRD_REGEX, ATTRD_RE_CLEAR_ALL);
350362
}
351363

352-
/* Delete the value */
364+
/* Make sure attribute and value are not set, so we delete via regex */
365+
if (crm_element_value(xml, F_ATTRD_ATTRIBUTE)) {
366+
crm_xml_replace(xml, F_ATTRD_ATTRIBUTE, NULL);
367+
}
353368
if (crm_element_value(xml, F_ATTRD_VALUE)) {
354369
crm_xml_replace(xml, F_ATTRD_VALUE, NULL);
355370
}
@@ -504,20 +519,28 @@ attrd_client_query(crm_client_t *client, uint32_t id, uint32_t flags, xmlNode *q
504519
static void
505520
attrd_peer_clear_failure(crm_node_t *peer, xmlNode *xml)
506521
{
507-
const char *rsc = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
522+
const char *rsc = crm_element_value(xml, F_ATTRD_RESOURCE);
508523
const char *host = crm_element_value(xml, F_ATTRD_HOST);
524+
const char *op = crm_element_value(xml, F_ATTRD_OPERATION);
525+
const char *interval_s = crm_element_value(xml, F_ATTRD_INTERVAL);
526+
int interval = crm_get_interval(interval_s);
509527
char *attr = NULL;
510528
GHashTableIter iter;
511529
regex_t regex;
512530

513-
if (attrd_failure_regex(&regex, rsc) != pcmk_ok) {
531+
if (attrd_failure_regex(&regex, rsc, op, interval) != pcmk_ok) {
514532
crm_info("Ignoring invalid request to clear failures for %s",
515533
(rsc? rsc : "all resources"));
516534
return;
517535
}
518536

519537
crm_xml_add(xml, F_ATTRD_TASK, ATTRD_OP_UPDATE);
520538

539+
/* Make sure value is not set, so we delete */
540+
if (crm_element_value(xml, F_ATTRD_VALUE)) {
541+
crm_xml_replace(xml, F_ATTRD_VALUE, NULL);
542+
}
543+
521544
g_hash_table_iter_init(&iter, attributes);
522545
while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) {
523546
if (regexec(&regex, attr, 0, NULL, 0) == 0) {
@@ -1030,6 +1053,7 @@ build_update_element(xmlNode *parent, attribute_t *a, const char *nodeid, const
10301053
for (lpc = 0; uuid[lpc] != 0; lpc++) {
10311054
switch (uuid[lpc]) {
10321055
case ':':
1056+
case '#':
10331057
uuid[lpc] = '.';
10341058
}
10351059
}

attrd/legacy.c

Lines changed: 78 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -237,19 +237,27 @@ find_hash_entry(xmlNode * msg)
237237
static void
238238
local_clear_failure(xmlNode *xml)
239239
{
240-
const char *rsc = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
240+
const char *rsc = crm_element_value(xml, F_ATTRD_RESOURCE);
241241
const char *what = rsc? rsc : "all resources";
242+
const char *op = crm_element_value(xml, F_ATTRD_OPERATION);
243+
const char *interval_s = crm_element_value(xml, F_ATTRD_INTERVAL);
244+
int interval = crm_get_interval(interval_s);
242245
regex_t regex;
243246
GHashTableIter iter;
244247
attr_hash_entry_t *hash_entry = NULL;
245248

246-
if (attrd_failure_regex(&regex, rsc) != pcmk_ok) {
249+
if (attrd_failure_regex(&regex, rsc, op, interval) != pcmk_ok) {
247250
crm_info("Ignoring invalid request to clear %s",
248251
(rsc? rsc : "all resources"));
249252
return;
250253
}
251254
crm_debug("Clearing %s locally", what);
252255

256+
/* Make sure value is not set, so we delete */
257+
if (crm_element_value(xml, F_ATTRD_VALUE)) {
258+
crm_xml_replace(xml, F_ATTRD_VALUE, NULL);
259+
}
260+
253261
g_hash_table_iter_init(&iter, attr_hash);
254262
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &hash_entry)) {
255263
if (regexec(&regex, hash_entry->id, 0, NULL, 0) == 0) {
@@ -282,15 +290,40 @@ remote_clear_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
282290
"/" XML_CIB_TAG_STATE "[@" XML_NODE_IS_REMOTE "='true']" x \
283291
"/" XML_TAG_TRANSIENT_NODEATTRS "/" XML_TAG_ATTR_SETS "/" XML_CIB_TAG_NVPAIR
284292

293+
/* xpath component to match an attribute name exactly */
294+
#define XPATH_NAME_IS(x) "@" XML_NVPAIR_ATTR_NAME "='" x "'"
295+
296+
/* xpath component to match an attribute name by prefix */
297+
#define XPATH_NAME_START(x) "starts-with(@" XML_NVPAIR_ATTR_NAME ", '" x "')"
298+
285299
/* xpath ending to clear all resources */
286300
#define XPATH_CLEAR_ALL \
287-
"[starts-with(@" XML_NVPAIR_ATTR_NAME ", '" CRM_FAIL_COUNT_PREFIX "-') " \
288-
"or starts-with(@" XML_NVPAIR_ATTR_NAME ", '" CRM_LAST_FAILURE_PREFIX "-')]"
301+
"[" XPATH_NAME_START(CRM_FAIL_COUNT_PREFIX "-") \
302+
" or " XPATH_NAME_START(CRM_LAST_FAILURE_PREFIX "-") "]"
289303

290-
/* xpath ending to clear one resource (format takes resource name x 2) */
304+
/* xpath ending to clear all operations for one resource
305+
* (format takes resource name x 4)
306+
*
307+
* @COMPAT attributes set < 1.1.17:
308+
* also match older attributes that do not have the operation part
309+
*/
291310
#define XPATH_CLEAR_ONE \
292-
"[@" XML_NVPAIR_ATTR_NAME "='" CRM_FAIL_COUNT_PREFIX "-%s' " \
293-
"or @" XML_NVPAIR_ATTR_NAME "='" CRM_LAST_FAILURE_PREFIX "-%s']"
311+
"[" XPATH_NAME_IS(CRM_FAIL_COUNT_PREFIX "-%s") \
312+
" or " XPATH_NAME_IS(CRM_LAST_FAILURE_PREFIX "-%s") \
313+
" or " XPATH_NAME_START(CRM_FAIL_COUNT_PREFIX "-%s#") \
314+
" or " XPATH_NAME_START(CRM_LAST_FAILURE_PREFIX "-%s#") "]"
315+
316+
/* xpath ending to clear one operation for one resource
317+
* (format takes resource name x 2, resource name + operation + interval x 2)
318+
*
319+
* @COMPAT attributes set < 1.1.17:
320+
* also match older attributes that do not have the operation part
321+
*/
322+
#define XPATH_CLEAR_OP \
323+
"[" XPATH_NAME_IS(CRM_FAIL_COUNT_PREFIX "-%s") \
324+
" or " XPATH_NAME_IS(CRM_LAST_FAILURE_PREFIX "-%s") \
325+
" or " XPATH_NAME_IS(CRM_FAIL_COUNT_PREFIX "-%s#%s_%d") \
326+
" or " XPATH_NAME_IS(CRM_LAST_FAILURE_PREFIX "-%s#%s_%d") "]"
294327

295328
/*!
296329
* \internal
@@ -301,8 +334,9 @@ remote_clear_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
301334
static void
302335
remote_clear_failure(xmlNode *xml)
303336
{
304-
const char *rsc = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
337+
const char *rsc = crm_element_value(xml, F_ATTRD_RESOURCE);
305338
const char *host = crm_element_value(xml, F_ATTRD_HOST);
339+
const char *op = crm_element_value(xml, F_ATTRD_OPERATION);
306340
int rc = pcmk_ok;
307341
char *xpath;
308342

@@ -313,18 +347,44 @@ remote_clear_failure(xmlNode *xml)
313347
return;
314348
}
315349

316-
if ((rsc == NULL) && (host == NULL)) {
317-
xpath = crm_strdup_printf(XPATH_REMOTE_ATTR("") XPATH_CLEAR_ALL);
350+
/* Build an xpath to clear appropriate attributes */
351+
352+
if (rsc == NULL) {
353+
/* No resource specified, clear all resources */
354+
355+
if (host == NULL) {
356+
xpath = crm_strdup_printf(XPATH_REMOTE_ATTR("") XPATH_CLEAR_ALL);
357+
} else {
358+
xpath = crm_strdup_printf(XPATH_REMOTE_ATTR(XPATH_ID) XPATH_CLEAR_ALL,
359+
host);
360+
}
361+
362+
} else if (op == NULL) {
363+
/* Resource but no operation specified, clear all operations */
364+
365+
if (host == NULL) {
366+
xpath = crm_strdup_printf(XPATH_REMOTE_ATTR("") XPATH_CLEAR_ONE,
367+
rsc, rsc, rsc, rsc);
368+
} else {
369+
xpath = crm_strdup_printf(XPATH_REMOTE_ATTR(XPATH_ID) XPATH_CLEAR_ONE,
370+
host, rsc, rsc, rsc, rsc);
371+
}
318372

319-
} else if (rsc == NULL) {
320-
xpath = crm_strdup_printf(XPATH_REMOTE_ATTR(XPATH_ID) XPATH_CLEAR_ALL,
321-
host);
322-
} else if (host == NULL) {
323-
xpath = crm_strdup_printf(XPATH_REMOTE_ATTR("") XPATH_CLEAR_ONE,
324-
rsc, rsc);
325373
} else {
326-
xpath = crm_strdup_printf(XPATH_REMOTE_ATTR(XPATH_ID) XPATH_CLEAR_ONE,
327-
host, rsc, rsc);
374+
/* Resource and operation specified */
375+
376+
const char *interval_s = crm_element_value(xml, F_ATTRD_INTERVAL);
377+
int interval = crm_get_interval(interval_s);
378+
379+
if (host == NULL) {
380+
xpath = crm_strdup_printf(XPATH_REMOTE_ATTR("") XPATH_CLEAR_OP,
381+
rsc, rsc, rsc, op, interval,
382+
rsc, op, interval);
383+
} else {
384+
xpath = crm_strdup_printf(XPATH_REMOTE_ATTR(XPATH_ID) XPATH_CLEAR_OP,
385+
host, rsc, rsc, rsc, op, interval,
386+
rsc, op, interval);
387+
}
328388
}
329389

330390
crm_trace("Clearing attributes matching %s", xpath);

crmd/attrd.c

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ log_attrd_error(const char *host, const char *name, const char *value,
5353

5454
static void
5555
update_attrd_helper(const char *host, const char *name, const char *value,
56-
const char *user_name, gboolean is_remote_node,
57-
char command)
56+
const char *interval, const char *user_name,
57+
gboolean is_remote_node, char command)
5858
{
5959
int rc;
6060
int max = 5;
@@ -78,9 +78,16 @@ update_attrd_helper(const char *host, const char *name, const char *value,
7878
}
7979
}
8080

81-
rc = attrd_update_delegate(attrd_ipc, command, host, name, value,
82-
XML_CIB_TAG_STATUS, NULL, NULL, user_name,
83-
attrd_opts);
81+
if (command) {
82+
rc = attrd_update_delegate(attrd_ipc, command, host, name, value,
83+
XML_CIB_TAG_STATUS, NULL, NULL,
84+
user_name, attrd_opts);
85+
} else {
86+
/* (ab)using name/value as resource/operation */
87+
rc = attrd_clear_delegate(attrd_ipc, host, name, value, interval,
88+
user_name, attrd_opts);
89+
}
90+
8491
if (rc == pcmk_ok) {
8592
break;
8693

@@ -103,21 +110,24 @@ void
103110
update_attrd(const char *host, const char *name, const char *value,
104111
const char *user_name, gboolean is_remote_node)
105112
{
106-
update_attrd_helper(host, name, value, user_name, is_remote_node, 'U');
113+
update_attrd_helper(host, name, value, NULL, user_name, is_remote_node,
114+
'U');
107115
}
108116

109117
void
110118
update_attrd_remote_node_removed(const char *host, const char *user_name)
111119
{
112120
crm_trace("Asking attrd to purge Pacemaker Remote node %s", host);
113-
update_attrd_helper(host, NULL, NULL, user_name, TRUE, 'C');
121+
update_attrd_helper(host, NULL, NULL, NULL, user_name, TRUE, 'C');
114122
}
115123

116124
void
117-
update_attrd_clear_failures(const char *host, const char *rsc,
118-
gboolean is_remote_node)
125+
update_attrd_clear_failures(const char *host, const char *rsc, const char *op,
126+
const char *interval, gboolean is_remote_node)
119127
{
120-
crm_info("Asking attrd to clear failure of %s on %s node %s",
128+
crm_info("Asking attrd to clear failure of %s %s for %s on %s node %s",
129+
(op? op : "all operations"),
130+
(interval? interval : "at all intervals"),
121131
rsc, (is_remote_node? "Pacemaker Remote" : "cluster"), host);
122-
update_attrd_helper(host, rsc, NULL, NULL, is_remote_node, 'c');
132+
update_attrd_helper(host, rsc, op, interval, NULL, is_remote_node, 0);
123133
}

crmd/crmd_lrm.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
#include <crmd_messages.h>
2020

2121
extern gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level);
22-
extern void lrm_clear_last_failure(const char *rsc_id, const char *node_name);
22+
extern void lrm_clear_last_failure(const char *rsc_id, const char *node_name,
23+
const char *operation, int interval);
2324
void lrm_op_callback(lrmd_event_data_t * op);
2425

2526
typedef struct resource_history_s {

crmd/crmd_utils.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ void init_transient_attrs(const char *uname, const char *start_state, int option
9494
void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node);
9595
void update_attrd_remote_node_removed(const char *host, const char *user_name);
9696
void update_attrd_clear_failures(const char *host, const char *rsc,
97+
const char *op, const char *interval,
9798
gboolean is_remote_node);
9899

99100
int crmd_join_phase_count(enum crm_join_phase phase);

0 commit comments

Comments
 (0)