-
Notifications
You must be signed in to change notification settings - Fork 629
/
job_resources.h
306 lines (277 loc) · 13.2 KB
/
job_resources.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
/*****************************************************************************\
* job_resources.h - functions to manage data structure identifying specific
* CPUs allocated to a job, step or partition
*****************************************************************************
* Copyright (C) 2008 Lawrence Livermore National Security.
* Written by Morris Jette <jette1@llnl.gov>.
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#ifndef _JOB_RESOURCES_H
#define _JOB_RESOURCES_H
#include <inttypes.h>
#include "src/common/bitstring.h"
#include "src/common/pack.h"
#include "src/slurmctld/slurmctld.h"
/* struct job_resources defines exactly which resources are allocated
* to a job, step, partition, etc.
*
* core_bitmap - Bitmap of allocated cores for all nodes and sockets.
* The bitmap reflects allocated resources only on the
* allocated nodes, not the full system resources.
* core_bitmap_used - Bitmap of cores allocated to job steps (see above)
* cores_per_socket - Count of cores per socket on this node, build by
* build_job_resources() and ensures consistent
* interpretation of core_bitmap
* cpus - Count of desired/allocated CPUs per node for job/step
* cpus_used - For a job, count of CPUs per node used by job steps
* cpu_array_cnt - Count of elements in cpu_array_* below
* cpu_array_value - Count of allocated CPUs per node for job
* cpu_array_reps - Number of consecutive nodes on which cpu_array_value
* is duplicated. See NOTES below.
* memory_allocated - MB per node reserved for the job or step
* memory_used - MB per node of memory consumed by job steps
* nhosts - Number of nodes in the allocation. On a
* bluegene machine this represents the number
* of midplanes used. This should always be
* the number of bits set in node_bitmap.
* node_bitmap - Bitmap of nodes allocated to the job. Unlike the
* node_bitmap in slurmctld's job record, the bits
* here do NOT get cleared as the job completes on a
* node
* node_req - NODE_CR_RESERVED|NODE_CR_ONE_ROW|NODE_CR_AVAILABLE
* nodes - Names of nodes in original job allocation
* ncpus - Number of processors in the allocation
* sock_core_rep_count - How many consecutive nodes that sockets_per_node
* and cores_per_socket apply to, build by
* build_job_resources() and ensures consistent
* interpretation of core_bitmap
* sockets_per_node - Count of sockets on this node, build by
* build_job_resources() and ensures consistent
* interpretation of core_bitmap
* tasks_per_node - Expected tasks to launch per node. Currently used only
* by cons_tres for tres_per_task support at resource
* allocation time. No need to save/restore or pack.
* whole_node - Job allocated full node (used only by select/cons_tres)
*
* NOTES:
* cpu_array_* contains the same information as "cpus", but in a more compact
* format. For example if cpus = {4, 4, 2, 2, 2, 2, 2, 2} then cpu_array_cnt=2
* cpu_array_value = {4, 2} and cpu_array_reps = {2, 6}. We do not need to
* save/restore these values, but generate them by calling
* build_job_resources_cpu_array()
*
* Sample layout of core_bitmap:
* | Node_0 | Node_1 |
* | Sock_0 | Sock_1 | Sock_0 | Sock_1 |
* | Core_0 | Core_1 | Core_0 | Core_1 | Core_0 | Core_1 | Core_0 | Core_1 |
* | Bit_0 | Bit_1 | Bit_2 | Bit_3 | Bit_4 | Bit_5 | Bit_6 | Bit_7 |
*
* If a job changes size (relinquishes nodes), the node_bitmap will remain
* unchanged, but cpus, cpus_used, cpus_array_*, and memory_used will be
* updated (e.g. cpus and mem_used on that node cleared).
*/
struct job_resources {
bitstr_t *core_bitmap;
bitstr_t *core_bitmap_used;
uint32_t cpu_array_cnt;
uint16_t *cpu_array_value;
uint32_t *cpu_array_reps;
uint16_t *cpus;
uint16_t *cpus_used;
uint16_t *cores_per_socket;
uint16_t cr_type;
uint64_t *memory_allocated;
uint64_t *memory_used;
uint32_t nhosts;
bitstr_t *node_bitmap;
uint32_t node_req;
char *nodes;
uint32_t ncpus;
uint32_t *sock_core_rep_count;
uint16_t *sockets_per_node;
uint16_t *tasks_per_node;
uint16_t threads_per_core;
uint8_t whole_node;
};
/*
* node_res_record.node_state assists with the unique state of each node.
* When a job is allocated, these flags provide protection for nodes in a
* OverSubscribe=NO or OverSubscribe=EXCLUSIVE partition from other jobs.
*
* NOTES:
* - If node is in use by OverSubscribe=NO part, some CPUs/memory may be
* available.
* - Caution with NODE_CR_AVAILABLE: a Sharing partition could be full.
*
* - these values are staggered so that they can be incremented as multiple
* jobs are allocated to each node. This is needed to be able to support
* preemption, which can override these protections.
*/
enum node_cr_state {
NODE_CR_AVAILABLE = 0, /* The node may be IDLE or IN USE (shared) */
NODE_CR_ONE_ROW = 1, /* in use by OverSubscribe=NO part */
NODE_CR_RESERVED = 64000 /* in use by OverSubscribe=EXCLUSIVE part */
};
/* Create an empty job_resources data structure, just a call to xmalloc() */
extern job_resources_t *create_job_resources(void);
/* Set the socket and core counts associated with a set of selected
* nodes of a job_resources data structure based upon slurmctld state.
* (sets cores_per_socket, sockets_per_node, and sock_core_rep_count based
* upon the value of node_bitmap, also creates core_bitmap based upon
* the total number of cores in the allocation). Call this ONLY from
* slurmctld. Example of use:
*
* job_resources_t *job_resrcs_ptr = create_job_resources();
* node_name2bitmap("dummy[2,5,12,16]", true, &(job_res_ptr->node_bitmap));
* rc = build_job_resources(job_resrcs_ptr);
*/
extern int build_job_resources(job_resources_t *job_resrcs_ptr);
/* Rebuild cpu_array_cnt, cpu_array_value, and cpu_array_reps based upon the
* values of cpus in an existing data structure
* Return total CPU count or -1 on error */
extern int build_job_resources_cpu_array(job_resources_t *job_resrcs_ptr);
/* Validate a job_resources data structure originally built using
* build_job_resources() is still valid based upon slurmctld state.
* NOTE: Reset the node_bitmap field before calling this function.
* If the sockets_per_node or cores_per_socket for any node in the allocation
* changes, then return SLURM_ERROR. Otherwise return SLURM_SUCCESS. Any
* change in a node's socket or core count require that any job running on
* that node be killed. Example of use:
*
* rc = valid_job_resources(job_resrcs_ptr);
*/
extern int valid_job_resources(job_resources_t *job_resrcs_ptr);
/* Make a copy of a job_resources data structure,
* free using free_job_resources() */
extern job_resources_t *copy_job_resources(job_resources_t *job_resrcs_ptr);
/* Free job_resources data structure created using copy_job_resources() or
* unpack_job_resources() */
extern void free_job_resources(job_resources_t **job_resrcs_pptr);
/* Log the contents of a job_resources data structure using info() */
extern void log_job_resources(void *job_ptr);
/* Un/pack full job_resources data structure */
extern void pack_job_resources(job_resources_t *job_resrcs_ptr, buf_t *buffer,
uint16_t protocol_version);
extern int unpack_job_resources(job_resources_t **job_resrcs_pptr,
buf_t *buffer, uint16_t protocol_version);
/* Reset the node_bitmap in a job_resources data structure
* This is needed after a restart/reconfiguration since nodes can
* be added or removed from the system resulting in changing in
* the bitmap size or bit positions */
extern int reset_node_bitmap(void *job_ptr);
/* For a given node_id, socket_id and core_id, get it's offset within
* the core bitmap */
extern int get_job_resources_offset(job_resources_t *job_resrcs_ptr,
uint32_t node_id, uint16_t socket_id,
uint16_t core_id);
/* Get/set bit value at specified location.
* node_id, socket_id and core_id are all zero origin */
extern int get_job_resources_bit(job_resources_t *job_resrcs_ptr,
uint32_t node_id, uint16_t socket_id,
uint16_t core_id);
extern int set_job_resources_bit(job_resources_t *job_resrcs_ptr,
uint32_t node_id, uint16_t socket_id,
uint16_t core_id);
/* For every core bitmap set in the "from" resources structure at
* from_node_offset, set the corresponding bit in the "new" resources structure
* at new_node_offset */
extern int job_resources_bits_copy(job_resources_t *new_job_resrcs_ptr,
uint16_t new_node_offset,
job_resources_t *from_job_resrcs_ptr,
uint16_t from_node_offset);
/*
* AND two job_resources structures.
* Every node/core set in job_resrcs1_ptr and job_resrcs2_ptr is set in the
* resulting job_resrcs1_ptr data structure
* RET SLURM_SUCCESS or an error code
*/
extern int job_resources_and(job_resources_t *job_resrcs1_ptr,
job_resources_t *job_resrcs2_ptr);
/*
* OR two job_resources structures.
* Every node/core set in job_resrcs1_ptr or job_resrcs2_ptr is set in the
* resulting job_resrcs1_ptr data structure
* RET SLURM_SUCCESS or an error code
*/
extern int job_resources_or(job_resources_t *job_resrcs1_ptr,
job_resources_t *job_resrcs2_ptr);
/* Get/clear/set bit value at specified location for whole node allocations
* get is for any socket/core on the specified node
* set is for all sockets/cores on the specified node
* fully compatible with set/get_job_resources_bit()
* node_id is all zero origin */
extern int get_job_resources_node(job_resources_t *job_resrcs_ptr,
uint32_t node_id);
extern int clear_job_resources_node(job_resources_t *job_resrcs_ptr,
uint32_t node_id);
extern int set_job_resources_node(job_resources_t *job_resrcs_ptr,
uint32_t node_id);
/* Completely remove specified node from job resources structure */
extern int extract_job_resources_node(job_resources_t *job_resrcs_ptr,
uint32_t node_id);
/* Return the count of core bitmaps set for the specific node */
extern int count_job_resources_node(job_resources_t *job_resrcs_ptr,
uint32_t node_id);
/* Return a copy of core_bitmap only for the specific node */
extern bitstr_t * copy_job_resources_node(job_resources_t *job_resrcs_ptr,
uint32_t node_id);
/* Get socket and core count for a specific node_id (zero origin) */
extern int get_job_resources_cnt(job_resources_t *job_resrcs_ptr,
uint32_t node_id, uint16_t *socket_cnt,
uint16_t *cores_per_socket_cnt);
/* Get CPU count for a specific node_id (zero origin), return -1 on error */
extern int get_job_resources_cpus(job_resources_t *job_resrcs_ptr,
uint32_t node_id);
/*
* Test if job can fit into the given full-length core_bitmap
* IN job_resrcs_ptr - resources allocated to a job
* IN full_bitmap - bitmap of available CPUs
* RET 1 on success, 0 otherwise
*/
extern int job_fits_into_cores(job_resources_t *job_resrcs_ptr,
bitstr_t *full_bitmap);
/*
* Add job to full-length core_bitmap
* IN job_resrcs_ptr - resources allocated to a job
* IN/OUT full_bitmap - bitmap of available CPUs, allocate as needed
* RET 1 on success, 0 otherwise
*/
extern void add_job_to_cores(job_resources_t *job_resrcs_ptr,
bitstr_t **full_core_bitmap);
/* Given a job pointer and a global node index, return the index of that
* node in the job_resrcs_ptr->cpus. Return -1 if invalid */
extern int job_resources_node_inx_to_cpu_inx(job_resources_t *job_resrcs_ptr,
int node_inx);
extern uint16_t job_resources_get_node_cpu_cnt(job_resources_t *job_resrcs_ptr,
int job_node_inx,
int sys_node_inx);
#endif /* !_JOB_RESOURCES_H */