Skip to content

Commit

Permalink
Allow for almost any node state flags to be in SuspendExcStates
Browse files Browse the repository at this point in the history
Continuation of 94ce256.

Bug 15184
  • Loading branch information
ScottHilton authored and gaijin03 committed Jan 20, 2023
1 parent 9e8efab commit 5f2cba5
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 21 deletions.
6 changes: 4 additions & 2 deletions doc/html/elastic_computing.shtml
Expand Up @@ -141,8 +141,10 @@ Multiple partitions may be specified using a comma separator.
By default, no nodes are excluded.
<dt><b>SuspendExcStates</b>
<dd>Specifies node states that are not to be powered down automatically.
Valid options include Down, Drain and Planned. By default, any of these states,
if idle for <b>SuspendTime</b>, would be powered down.
Valid states include CLOUD, DOWN, DRAIN, DYNAMIC_FUTURE, DYNAMIC_NORM, FAIL,
INVALID_REG, MAINTENANCE, NOT_RESPONDING, PERFCTRS, PLANNED, and RESERVED.
By default, any of these states, if idle for <b>SuspendTime</b>, would be
powered down.
<dt><b>SuspendProgram</b>
<dd>The program executed when a node is no longer required and can be
relinquished to the cloud.
Expand Down
8 changes: 5 additions & 3 deletions doc/html/power_save.shtml
Expand Up @@ -116,9 +116,11 @@ Multiple partitions may be specified using a comma separator.
By default, no nodes are excluded.</li>

<li><b>SuspendExcStates</b>:
Specifies node states that are not to be powered down automatically. Valid
options include Down, Drain and Planned. By default, any of these states, if
idle for <b>SuspendTime</b>, would be powered down.
Specifies node states that are not to be powered down automatically.
Valid states include CLOUD, DOWN, DRAIN, DYNAMIC_FUTURE, DYNAMIC_NORM, FAIL,
INVALID_REG, MAINTENANCE, NOT_RESPONDING, PERFCTRS, PLANNED, and RESERVED.
By default, any of these states, if idle for <b>SuspendTime</b>, would be
powered down.

<li><b>BatchStartTimeout</b>:
Specifies how long to wait after a batch job start request is issued
Expand Down
3 changes: 2 additions & 1 deletion doc/man/man5/slurm.conf.5
Expand Up @@ -5271,7 +5271,8 @@ By default no nodes are excluded.
.TP
\fBSuspendExcStates\fR
Specifies node states that are not to be powered down automatically.
Valid options include Down, Drain and Planned.
Valid states include CLOUD, DOWN, DRAIN, DYNAMIC_FUTURE, DYNAMIC_NORM, FAIL,
INVALID_REG, MAINTENANCE, NOT_RESPONDING, PERFCTRS, PLANNED, and RESERVED.
By default, any of these states, if idle for SuspendTime, would be powered down.
.IP

Expand Down
15 changes: 15 additions & 0 deletions src/common/slurm_protocol_defs.c
Expand Up @@ -3789,6 +3789,21 @@ extern char *node_state_string_complete(uint32_t state)
return state_str;
}

extern uint32_t parse_node_state_flag(char *flag_str)
{
int flag_str_len = strlen(flag_str);

for (int i = 0; i < ARRAY_SIZE(node_state_flags); i++) {
if (!xstrncasecmp(flag_str, node_state_flags[i].str,
MIN(flag_str_len,
strlen(node_state_flags[i].str)))) {
return node_state_flags[i].flag;
}
}

return 0; /* No flags */
}

extern char *node_state_string(uint32_t inx)
{
int base = (inx & NODE_STATE_BASE);
Expand Down
5 changes: 5 additions & 0 deletions src/common/slurm_protocol_defs.h
Expand Up @@ -1704,6 +1704,11 @@ extern char *node_state_string_compact(uint32_t inx);
*/
extern char *node_state_string_complete(uint32_t inx);

/*
* Return first node state flag that matches the string
*/
extern uint32_t parse_node_state_flag(char *flag_str);

extern uint16_t power_flags_id(const char *power_flags);
extern char *power_flags_str(uint16_t power_flags);

Expand Down
46 changes: 31 additions & 15 deletions src/slurmctld/power_save.c
Expand Up @@ -110,7 +110,8 @@ List partial_node_list;
bitstr_t *exc_node_bitmap = NULL;

/* Possible SuspendExcStates */
static bool suspend_exc_down, suspend_exc_drain, suspend_exc_planned;
static bool suspend_exc_down;
static uint32_t suspend_exc_state_flags;

int suspend_cnt, resume_cnt;
float suspend_cnt_f, resume_cnt_f;
Expand Down Expand Up @@ -188,20 +189,38 @@ static int _list_part_node_lists(void *x, void *arg)
static void _parse_exc_states(void)
{
char *buf, *tok, *saveptr;
int tok_len;
/* Flags in _node_state_suspendable() are already excluded */
uint32_t excludable_state_flags = NODE_STATE_CLOUD |
NODE_STATE_DRAIN |
NODE_STATE_DYNAMIC_FUTURE |
NODE_STATE_DYNAMIC_NORM |
NODE_STATE_FAIL |
NODE_STATE_INVALID_REG |
NODE_STATE_MAINT |
NODE_STATE_NET |
NODE_STATE_NO_RESPOND |
NODE_STATE_PLANNED |
NODE_STATE_RES;

buf = xstrdup(exc_states);
for (tok = strtok_r(buf, ",", &saveptr); tok;
tok = strtok_r(NULL, ",", &saveptr)) {
tok_len = strlen(tok);
if (!xstrncasecmp(tok, "Down", MAX(tok_len, 2)))
uint32_t flag = 0;

/* Base node states */
if (!xstrncasecmp(tok, "DOWN", MAX(strlen(tok), 2))){
suspend_exc_down = true;
else if (!xstrncasecmp(tok, "Drained", MAX(tok_len, 2)))
suspend_exc_drain = true;
else if (!xstrncasecmp(tok, "Planned", MAX(tok_len, 1)))
suspend_exc_planned = true;
else
error("Invalid SuspendExcState %s", tok);
continue;
}

/* Flag node states */
flag = parse_node_state_flag(tok);
if (flag & excludable_state_flags) {
suspend_exc_state_flags |= flag;
continue;
}

error("Invalid SuspendExcState %s", tok);
}
xfree(buf);
}
Expand Down Expand Up @@ -235,9 +254,7 @@ static bool _node_state_should_suspend(node_record_t *node_ptr)
/* SuspendExcStates */
if (suspend_exc_down && IS_NODE_DOWN(node_ptr))
return false;
if (suspend_exc_drain && IS_NODE_DRAIN(node_ptr))
return false;
if (suspend_exc_planned && (node_ptr->node_state & NODE_STATE_PLANNED))
if (suspend_exc_state_flags & node_ptr->node_state)
return false;

return true;
Expand Down Expand Up @@ -701,8 +718,7 @@ static void _clear_power_config(void)
xfree(exc_parts);
xfree(exc_states);
suspend_exc_down = false;
suspend_exc_drain = false;
suspend_exc_planned = false;
suspend_exc_state_flags = 0;
FREE_NULL_BITMAP(exc_node_bitmap);
FREE_NULL_LIST(partial_node_list);
}
Expand Down

0 comments on commit 5f2cba5

Please sign in to comment.