From f4ebc793d015259e13310d76f35e0797355b6bbd Mon Sep 17 00:00:00 2001 From: Alejandro Sanchez Date: Wed, 18 May 2016 13:16:41 +0200 Subject: [PATCH] Fix MemSpecLimit to depend on TaskPlugin=task/cgroup and ConstrainRAMSpace. Bug #2713. --- NEWS | 2 ++ doc/man/man5/slurm.conf.5 | 4 +++- src/slurmd/common/slurmd_cgroup.c | 18 +++++++++++++++++- src/slurmd/common/slurmd_cgroup.h | 7 +++++-- src/slurmd/slurmd/slurmd.c | 12 ++++++++++-- 5 files changed, 37 insertions(+), 6 deletions(-) diff --git a/NEWS b/NEWS index f8794a82570..3545a7d0693 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,8 @@ documents those changes that are of interest to users and administrators. -- Fix issues when building on NetBSD. -- Fix jobcomp/elasticsearch build when libcurl is installed in a non-standard location. + -- Fix MemSpecLimit to explicitly require TaskPlugin=task/cgroup and + ConstrainRAMSpace set in cgroup.conf. * Changes in Slurm 15.08.11 =========================== diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 516bd636d32..b90d799c51a 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -3761,7 +3761,9 @@ Also see \fBFeature\fR. Limit on combined real memory allocation for compute node daemons (slurmd, slurmstepd), in megabytes. This memory is not available to job allocations. The daemons won't be killed when they exhaust the memory allocation -(ie. the OOM Killer is disabled for the daemon's memory cgroup). +(ie. the OOM Killer is disabled for the daemon's memory cgroup). This option has +no effect unless cgroup job confinement is also configured +(\fBTaskPlugin=task/cgroup\fR with \fBConstrainRAMSpace=yes\fR in cgroup.conf). .TP \fBPort\fR diff --git a/src/slurmd/common/slurmd_cgroup.c b/src/slurmd/common/slurmd_cgroup.c index 0d9b6dc0921..9283b30d7d0 100644 --- a/src/slurmd/common/slurmd_cgroup.c +++ b/src/slurmd/common/slurmd_cgroup.c @@ -459,7 +459,23 @@ extern int attach_system_memory_pid(pid_t pid) return SLURM_SUCCESS; } -extern bool check_cgroup_job_confinement(void) +extern bool check_memspec_cgroup_job_confinement(void) +{ + char *task_plugin_type = NULL; + bool status = false; + + if (read_slurm_cgroup_conf(&slurm_cgroup_conf)) + return false; + task_plugin_type = slurm_get_task_plugin(); + if (slurm_cgroup_conf.constrain_ram_space && + strstr(task_plugin_type, "cgroup")) + status = true; + xfree(task_plugin_type); + free_slurm_cgroup_conf(&slurm_cgroup_conf); + return status; +} + +extern bool check_corespec_cgroup_job_confinement(void) { char *task_plugin_type = NULL; bool status = FALSE; diff --git a/src/slurmd/common/slurmd_cgroup.h b/src/slurmd/common/slurmd_cgroup.h index 8224cd3fa0b..4b5bbd91b06 100644 --- a/src/slurmd/common/slurmd_cgroup.h +++ b/src/slurmd/common/slurmd_cgroup.h @@ -67,8 +67,11 @@ extern int attach_system_cpuset_pid(pid_t pid); /* Attach a pid to system memory cgroup */ extern int attach_system_memory_pid(pid_t pid); -/* Check that cgroup job confinement is configured */ -extern bool check_cgroup_job_confinement(void); +/* Check that corespec cgroup job confinement is configured */ +extern bool check_corespec_cgroup_job_confinement(void); + +/* Check that memspec cgroup job confinement is configured */ +extern bool check_memspec_cgroup_job_confinement(void); /* Attach a pid to the system cgroups */ extern void attach_system_cgroup_pid(pid_t pid); diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 13763f2d977..6984332477f 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -2082,8 +2082,10 @@ static int _core_spec_init(void) debug("Using core_spec/cray to manage specialized cores"); return SLURM_SUCCESS; } - if (!check_cgroup_job_confinement()) { - error("Resource spec: cgroup job confinement not configured"); + if (!check_corespec_cgroup_job_confinement()) { + error("Resource spec: cgroup job confinement not configured. " + "CoreSpec requires TaskPlugin=task/cgroup and " + "ConstrainCores=yes in cgroup.conf"); return SLURM_ERROR; } @@ -2158,6 +2160,12 @@ static int _memory_spec_init(void) "configured for this node"); return SLURM_SUCCESS; } + if (!check_memspec_cgroup_job_confinement()) { + error("Resource spec: cgroup job confinement not configured. " + "MemSpecLimit requires TaskPlugin=task/cgroup and " + "ConstrainRAMSpace=yes in cgroup.conf"); + return SLURM_ERROR; + } if (init_system_memory_cgroup() != SLURM_SUCCESS) { error("Resource spec: unable to initialize system " "memory cgroup");