Skip to content

Commit

Permalink
Fix UserCPU, SystemCPU, and TotalCPU with jobacct_gather/cgroup
Browse files Browse the repository at this point in the history
This patch fixes the regression caused by 0748356 where only the
linux plugin was handling this scenario correctly.

Bug 6332
  • Loading branch information
hintron authored and dannyauble committed Jan 15, 2019
1 parent 2b5f1a7 commit 8eb11cf
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 7 deletions.
2 changes: 2 additions & 0 deletions NEWS
Expand Up @@ -37,6 +37,8 @@ documents those changes that are of interest to users and administrators.
-- Suppress connection errors to primary slurmdbd when backup dbd is active.
-- Suppress connection errors to primary db when backup db kicks in
-- Add missing fields for sacct --completion when using jobcomp/filetxt.
-- Fix incorrect values set for UserCPU, SystemCPU, and TotalCPU sacct fields
when JobAcctGatherType=jobacct_gather/cgroup.

* Changes in Slurm 18.08.4
==========================
Expand Down
4 changes: 4 additions & 0 deletions src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c
Expand Up @@ -144,6 +144,10 @@ static void _prec_extra(jag_prec_t *prec, uint32_t taskid)
__func__, prec->pid, prec->ppid);
} else {
sscanf(cpu_time, "%*s %lu %*s %lu", &utime, &stime);
/*
* Store unnormalized times, we will normalize in when
* transfering to a struct jobacctinfo in job_common_poll_data()
*/
prec->usec = utime;
prec->ssec = stime;
}
Expand Down
14 changes: 9 additions & 5 deletions src/plugins/jobacct_gather/common/common_jag.c
Expand Up @@ -365,8 +365,12 @@ static int _get_process_data_line(int in, jag_prec_t *prec) {
prec->tres_data[TRES_ARRAY_VMEM].size_read = vsize;
prec->tres_data[TRES_ARRAY_MEM].size_read = rss * my_pagesize;

prec->usec = (double)utime/(double)hertz;
prec->ssec = (double)stime/(double)hertz;
/*
* Store unnormalized times, we will normalize in when
* transfering to a struct jobacctinfo in job_common_poll_data()
*/
prec->usec = (double)utime;
prec->ssec = (double)stime;
prec->last_cpu = last_cpu;
return 1;
}
Expand Down Expand Up @@ -998,7 +1002,7 @@ extern void jag_common_poll_data(
last_total_cputime =
(double)jobacct->tres_usage_in_tot[TRES_ARRAY_CPU];

cpu_calc = prec->ssec + prec->usec;
cpu_calc = (prec->ssec + prec->usec) / (double)hertz;

/*
* Since we are not storing things as a double anymore make it
Expand Down Expand Up @@ -1072,8 +1076,8 @@ extern void jag_common_poll_data(
total_job_vsize += jobacct->tres_usage_in_tot[TRES_ARRAY_VMEM];

/* Update the cpu times */
jobacct->user_cpu_sec = (uint32_t)prec->usec;
jobacct->sys_cpu_sec = (uint32_t)prec->ssec;
jobacct->user_cpu_sec = (uint32_t)(prec->usec / (double)hertz);
jobacct->sys_cpu_sec = (uint32_t)(prec->ssec / (double)hertz);

/* compute frequency */
jobacct->this_sampled_cputime =
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/jobacct_gather/common/common_jag.h
Expand Up @@ -48,11 +48,11 @@ typedef struct jag_prec { /* process record */
int last_cpu; /* last cpu */
pid_t pid;
pid_t ppid;
double ssec; /* system cpu time */
double ssec; /* system cpu time: To normalize divide by system hertz */
/* Units of tres_[in|out] should be raw numbers (bytes/joules) */
int tres_count; /* count of tres in the tres_data */
acct_gather_data_t *tres_data; /* array of tres data */
double usec; /* user cpu time */
double usec; /* user cpu time: To normalize divide by system hertz */
} jag_prec_t;

typedef struct jag_callbacks {
Expand Down

0 comments on commit 8eb11cf

Please sign in to comment.