Skip to content

Commit

Permalink
Fix jobacct_gather/cgroup to work correctly when more than one task is
Browse files Browse the repository at this point in the history
started on a node.

Bug 5865
  • Loading branch information
MarshallGarey authored and dannyauble committed Nov 21, 2018
1 parent ae89960 commit 5847bd7
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 36 deletions.
2 changes: 2 additions & 0 deletions NEWS
Expand Up @@ -52,6 +52,8 @@ documents those changes that are of interest to users and administrators.
-- Accept modifiers for TRES originally added in 6f0342e0358.
-- When doing a reconfigure handle QOS' GrpJobsAccrue correctly.
-- Remove unneeded extra parentheses from sh5util.
-- Fix jobacct_gather/cgroup to work correctly when more than one task is
started on a node.

* Changes in Slurm 18.08.3
==========================
Expand Down
8 changes: 6 additions & 2 deletions src/common/slurm_jobacct_gather.c
Expand Up @@ -172,7 +172,7 @@ static void _init_tres_usage(struct jobacctinfo *jobacct,
jobacct->tres_usage_out_min[i] = INFINITE64;
jobacct->tres_usage_out_tot[i] = INFINITE64;

if (jobacct_id && jobacct_id->taskid != NO_VAL16) {
if (jobacct_id && jobacct_id->taskid != NO_VAL) {
jobacct->tres_usage_in_max_taskid[i] =
(uint64_t) jobacct_id->taskid;
jobacct->tres_usage_in_min_taskid[i] =
Expand Down Expand Up @@ -320,6 +320,10 @@ static void _pack_jobacct_id(jobacct_id_t *jobacct_id,
pack16((uint16_t) jobacct_id->taskid, buffer);
} else {
pack32(NO_VAL, buffer);
/*
* This pack is not used in modern code, so leave
* it NO_VAL16.
*/
pack16(NO_VAL16, buffer);
}
}
Expand Down Expand Up @@ -904,7 +908,7 @@ extern jobacctinfo_t *jobacctinfo_create(jobacct_id_t *jobacct_id)
jobacct = xmalloc(sizeof(struct jobacctinfo));

if (!jobacct_id) {
temp_id.taskid = NO_VAL16;
temp_id.taskid = NO_VAL;
temp_id.nodeid = NO_VAL;
jobacct_id = &temp_id;
}
Expand Down
2 changes: 1 addition & 1 deletion src/common/slurm_jobacct_gather.h
Expand Up @@ -73,7 +73,7 @@
#define CPU_TIME_ADJ 1000

typedef struct {
uint16_t taskid; /* contains which task number it was on */
uint32_t taskid; /* contains which task number it was on */
uint32_t nodeid; /* contains which node number it was on */
stepd_step_rec_t *job; /* contains stepd job pointer */
} jobacct_id_t;
Expand Down
55 changes: 52 additions & 3 deletions src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c
Expand Up @@ -99,17 +99,45 @@ const uint32_t plugin_version = SLURM_VERSION_NUMBER;
/* Other useful declarations */
static slurm_cgroup_conf_t slurm_cgroup_conf;

static void _prec_extra(jag_prec_t *prec)
static void _prec_extra(jag_prec_t *prec, uint32_t taskid)
{
unsigned long utime, stime, total_rss, total_pgpgin;
char *cpu_time = NULL, *memory_stat = NULL, *ptr;
size_t cpu_time_size = 0, memory_stat_size = 0;
xcgroup_t *task_cpuacct_cg = NULL;;
xcgroup_t *task_memory_cg = NULL;
bool exit_early = false;

/* Find which task cgroups to use */
task_memory_cg = list_find_first(task_memory_cg_list,
find_task_cg_info,
&taskid);
task_cpuacct_cg = list_find_first(task_cpuacct_cg_list,
find_task_cg_info,
&taskid);

/*
* We should always find the task cgroups; if we don't for some reason,
* just print an error and return.
*/
if (!task_cpuacct_cg) {
error("%s: Could not find task_cpuacct_cg, this should never happen",
__func__);
exit_early = true;
}
if (!task_memory_cg) {
error("%s: Could not find task_memory_cg, this should never happen",
__func__);
exit_early = true;
}
if (exit_early)
return;

//DEF_TIMERS;
//START_TIMER;
/* info("before"); */
/* print_jag_prec(prec); */
xcgroup_get_param(&task_cpuacct_cg, "cpuacct.stat",
xcgroup_get_param(task_cpuacct_cg, "cpuacct.stat",
&cpu_time, &cpu_time_size);
if (cpu_time == NULL) {
debug2("%s: failed to collect cpuacct.stat pid %d ppid %d",
Expand All @@ -120,7 +148,7 @@ static void _prec_extra(jag_prec_t *prec)
prec->ssec = stime;
}

xcgroup_get_param(&task_memory_cg, "memory.stat",
xcgroup_get_param(task_memory_cg, "memory.stat",
&memory_stat, &memory_stat_size);
if (memory_stat == NULL) {
debug2("%s: failed to collect memory.stat pid %d ppid %d",
Expand Down Expand Up @@ -363,3 +391,24 @@ extern char* jobacct_cgroup_create_slurm_cg(xcgroup_ns_t* ns)

return pre;
}

extern int find_task_cg_info(void *x, void *key)
{
task_cg_info_t *task_cg = (task_cg_info_t*)x;
uint32_t taskid = *(uint32_t*)key;

if (task_cg->taskid == taskid)
return 1;

return 0;
}

extern void free_task_cg_info(void *object)
{
task_cg_info_t *task_cg = (task_cg_info_t *)object;

if (task_cg) {
xcgroup_destroy(&task_cg->task_cg);
xfree(task_cg);
}
}
17 changes: 15 additions & 2 deletions src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.h
Expand Up @@ -43,8 +43,17 @@
#include "src/common/xcgroup_read_config.h"
#include "src/slurmd/common/xcgroup.h"

extern xcgroup_t task_memory_cg;
extern xcgroup_t task_cpuacct_cg;
/*
* There are potentially multiple tasks on a node, so we want to
* track every task cgroup and which taskid it belongs to.
*/
typedef struct task_cg_info {
xcgroup_t task_cg;
uint32_t taskid;
} task_cg_info_t;

extern List task_memory_cg_list;
extern List task_cpuacct_cg_list;

extern int jobacct_gather_cgroup_cpuacct_init(
slurm_cgroup_conf_t *slurm_cgroup_conf);
Expand Down Expand Up @@ -76,3 +85,7 @@ extern int jobacct_gather_cgroup_memory_attach_task(
/* pid_t pid, jobacct_id_t *jobacct_id); */

extern char* jobacct_cgroup_create_slurm_cg (xcgroup_ns_t* ns);

extern int find_task_cg_info(void *x, void *key);

extern void free_task_cg_info(void *task_cg);
46 changes: 37 additions & 9 deletions src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_cpuacct.c
Expand Up @@ -57,7 +57,8 @@ static xcgroup_ns_t cpuacct_ns;
static xcgroup_t user_cpuacct_cg;
static xcgroup_t job_cpuacct_cg;
static xcgroup_t step_cpuacct_cg;
xcgroup_t task_cpuacct_cg;

List task_cpuacct_cg_list = NULL;

static uint32_t max_task_id;

Expand All @@ -68,6 +69,7 @@ jobacct_gather_cgroup_cpuacct_init(slurm_cgroup_conf_t *slurm_cgroup_conf)
user_cgroup_path[0]='\0';
job_cgroup_path[0]='\0';
jobstep_cgroup_path[0]='\0';
task_cgroup_path[0]='\0';

/* initialize cpuacct cgroup namespace */
if (xcgroup_ns_create(slurm_cgroup_conf, &cpuacct_ns, "", "cpuacct")
Expand All @@ -77,6 +79,9 @@ jobacct_gather_cgroup_cpuacct_init(slurm_cgroup_conf_t *slurm_cgroup_conf)
return SLURM_ERROR;
}

FREE_NULL_LIST(task_cpuacct_cg_list);
task_cpuacct_cg_list = list_create(free_task_cg_info);

return SLURM_SUCCESS;
}

Expand All @@ -90,7 +95,7 @@ jobacct_gather_cgroup_cpuacct_fini(slurm_cgroup_conf_t *slurm_cgroup_conf)
if (user_cgroup_path[0] == '\0'
|| job_cgroup_path[0] == '\0'
|| jobstep_cgroup_path[0] == '\0'
|| task_cgroup_path[0] == 0)
|| task_cgroup_path[0] == '\0')
return SLURM_SUCCESS;

/*
Expand Down Expand Up @@ -151,16 +156,16 @@ jobacct_gather_cgroup_cpuacct_fini(slurm_cgroup_conf_t *slurm_cgroup_conf)
if (lock_ok == true)
xcgroup_unlock(&cpuacct_cg);

xcgroup_destroy(&task_cpuacct_cg);
xcgroup_destroy(&user_cpuacct_cg);
xcgroup_destroy(&job_cpuacct_cg);
xcgroup_destroy(&step_cpuacct_cg);
xcgroup_destroy(&cpuacct_cg);
FREE_NULL_LIST(task_cpuacct_cg_list);

user_cgroup_path[0]='\0';
job_cgroup_path[0]='\0';
jobstep_cgroup_path[0]='\0';
task_cgroup_path[0] = 0;
task_cgroup_path[0]='\0';

xcgroup_ns_destroy(&cpuacct_ns);

Expand All @@ -180,6 +185,8 @@ jobacct_gather_cgroup_cpuacct_attach_task(pid_t pid, jobacct_id_t *jobacct_id)
int fstatus = SLURM_SUCCESS;
int rc;
char* slurm_cgpath;
task_cg_info_t *task_cg_info;
bool need_to_add = false;

job = jobacct_id->job;
uid = job->uid;
Expand All @@ -191,6 +198,8 @@ jobacct_gather_cgroup_cpuacct_attach_task(pid_t pid, jobacct_id_t *jobacct_id)
if (taskid >= max_task_id)
max_task_id = taskid;

xassert(task_cpuacct_cg_list);

debug("%s: jobid %u stepid %u taskid %u max_task_id %u",
__func__, jobid, stepid, taskid, max_task_id);

Expand All @@ -210,6 +219,7 @@ jobacct_gather_cgroup_cpuacct_attach_task(pid_t pid, jobacct_id_t *jobacct_id)
return SLURM_ERROR;
}
}
xfree(slurm_cgpath);

/* build job cgroup relative path if not set (may not be) */
if (*job_cgroup_path == '\0') {
Expand Down Expand Up @@ -344,26 +354,40 @@ jobacct_gather_cgroup_cpuacct_attach_task(pid_t pid, jobacct_id_t *jobacct_id)
goto error;
}

if (!(task_cg_info = list_find_first(task_cpuacct_cg_list,
find_task_cg_info,
&taskid))) {
task_cg_info = xmalloc(sizeof(*task_cg_info));
task_cg_info->taskid = taskid;
need_to_add = true;
}

/*
* Create task cgroup in the cpuacct ns
*/
if (xcgroup_create(&cpuacct_ns, &task_cpuacct_cg,
if (xcgroup_create(&cpuacct_ns, &task_cg_info->task_cg,
task_cgroup_path,
uid, gid) != XCGROUP_SUCCESS) {
/* do not delete user/job cgroup as they can exist for other
* steps, but release cgroup structures */
xcgroup_destroy(&user_cpuacct_cg);
xcgroup_destroy(&job_cpuacct_cg);

/* Don't use free_task_cg_info as the task_cg isn't there */
xfree(task_cg_info);

error("jobacct_gather/cgroup: unable to create jobstep %u.%u "
"task %u cpuacct cgroup", jobid, stepid, taskid);
fstatus = SLURM_ERROR;
goto error;
}

if (xcgroup_instantiate(&task_cpuacct_cg) != XCGROUP_SUCCESS) {
if (xcgroup_instantiate(&task_cg_info->task_cg)
!= XCGROUP_SUCCESS) {
xcgroup_destroy(&user_cpuacct_cg);
xcgroup_destroy(&job_cpuacct_cg);
xcgroup_destroy(&step_cpuacct_cg);
free_task_cg_info(task_cg_info);
error("jobacct_gather/cgroup: unable to instantiate jobstep "
"%u.%u task %u cpuacct cgroup", jobid, stepid, taskid);
fstatus = SLURM_ERROR;
Expand All @@ -373,14 +397,18 @@ jobacct_gather_cgroup_cpuacct_attach_task(pid_t pid, jobacct_id_t *jobacct_id)
/*
* Attach the slurmstepd to the task cpuacct cgroup
*/
rc = xcgroup_add_pids(&task_cpuacct_cg, &pid, 1);
rc = xcgroup_add_pids(&task_cg_info->task_cg, &pid, 1);
if (rc != XCGROUP_SUCCESS) {
error("jobacct_gather/cgroup: unable to add slurmstepd to "
"cpuacct cg '%s'", task_cpuacct_cg.path);
error("jobacct_gather/cgroup: unable to add slurmstepd to cpuacct cg '%s'",
task_cg_info->task_cg.path);
fstatus = SLURM_ERROR;
} else
fstatus = SLURM_SUCCESS;

/* Add the task cgroup to the list now that it is initialized. */
if (need_to_add)
list_append(task_cpuacct_cg_list , task_cg_info);

error:
xcgroup_unlock(&cpuacct_cg);
xcgroup_destroy(&cpuacct_cg);
Expand Down

0 comments on commit 5847bd7

Please sign in to comment.