Skip to content
This repository was archived by the owner on Aug 6, 2020. It is now read-only.

Commit a931ad4

Browse files
committed
core: introduce new Delegate=yes/no property controlling creation of cgroup subhierarchies
For priviliged units this resource control property ensures that the processes have all controllers systemd manages enabled. For unpriviliged services (those with User= set) this ensures that access rights to the service cgroup is granted to the user in question, to create further subgroups. Note that this only applies to the name=systemd hierarchy though, as access to other controllers is not safe for unpriviliged processes. Delegate=yes should be set for container scopes where a systemd instance inside the container shall manage the hierarchies below its own cgroup and have access to all controllers. Delegate=yes should also be set for user@.service, so that systemd --user can run, controlling its own cgroup tree. This commit changes machined, systemd-nspawn@.service and user@.service to set this boolean, in order to ensure that container management will just work, and the user systemd instance can run fine.
1 parent c962cb6 commit a931ad4

15 files changed

+112
-8
lines changed

man/systemd.resource-control.xml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,20 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>.
394394
</listitem>
395395
</varlistentry>
396396

397+
<varlistentry>
398+
<term><varname>Delegate=</varname></term>
399+
400+
<listitem>
401+
<para>Turns on delegation of further resource control
402+
partitioning to processes of the unit. For unpriviliged
403+
services (i.e. those using the <varname>User=</varname>
404+
setting) this allows processes to create a subhierarchy
405+
beneath its control group path. For priviliged services and
406+
scopes this ensures the processes will have all control
407+
group controllers enabled.</para>
408+
</listitem>
409+
</varlistentry>
410+
397411
</variablelist>
398412
</refsect1>
399413

src/core/cgroup.c

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
105105
"%sBlockIOWeight=%lu\n"
106106
"%sStartupBlockIOWeight=%lu\n"
107107
"%sMemoryLimit=%" PRIu64 "\n"
108-
"%sDevicePolicy=%s\n",
108+
"%sDevicePolicy=%s\n"
109+
"%sDelegate=%s\n",
109110
prefix, yes_no(c->cpu_accounting),
110111
prefix, yes_no(c->blockio_accounting),
111112
prefix, yes_no(c->memory_accounting),
@@ -115,7 +116,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
115116
prefix, c->blockio_weight,
116117
prefix, c->startup_blockio_weight,
117118
prefix, c->memory_limit,
118-
prefix, cgroup_device_policy_to_string(c->device_policy));
119+
prefix, cgroup_device_policy_to_string(c->device_policy),
120+
prefix, yes_no(c->delegate));
119121

120122
LIST_FOREACH(device_allow, a, c->device_allow)
121123
fprintf(f,
@@ -461,7 +463,8 @@ CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
461463
c->memory_limit != (uint64_t) -1)
462464
mask |= CGROUP_MEMORY;
463465

464-
if (c->device_allow || c->device_policy != CGROUP_AUTO)
466+
if (c->device_allow ||
467+
c->device_policy != CGROUP_AUTO)
465468
mask |= CGROUP_DEVICE;
466469

467470
return mask;
@@ -474,6 +477,19 @@ CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
474477
if (!c)
475478
return 0;
476479

480+
/* If delegation is turned on, then turn on all cgroups,
481+
* unless the process we fork into it is known to drop
482+
* privileges anyway, and shouldn't get access to the
483+
* controllers anyway. */
484+
485+
if (c->delegate) {
486+
ExecContext *e;
487+
488+
e = unit_get_exec_context(u);
489+
if (!e || exec_context_maintains_privileges(e))
490+
return _CGROUP_CONTROLLER_MASK_ALL;
491+
}
492+
477493
return cgroup_context_get_mask(c);
478494
}
479495

src/core/cgroup.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ struct CGroupContext {
8383

8484
CGroupDevicePolicy device_policy;
8585
LIST_HEAD(CGroupDeviceAllow, device_allow);
86+
87+
bool delegate;
8688
};
8789

8890
#include "unit.h"

src/core/dbus-cgroup.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ static int property_get_ulong_as_u64(
153153

154154
const sd_bus_vtable bus_cgroup_vtable[] = {
155155
SD_BUS_VTABLE_START(0),
156+
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
156157
SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
157158
SD_BUS_PROPERTY("CPUShares", "t", property_get_ulong_as_u64, offsetof(CGroupContext, cpu_shares), 0),
158159
SD_BUS_PROPERTY("StartupCPUShares", "t", property_get_ulong_as_u64, offsetof(CGroupContext, startup_cpu_shares), 0),
@@ -170,6 +171,39 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
170171
SD_BUS_VTABLE_END
171172
};
172173

174+
static int bus_cgroup_set_transient_property(
175+
Unit *u,
176+
CGroupContext *c,
177+
const char *name,
178+
sd_bus_message *message,
179+
UnitSetPropertiesMode mode,
180+
sd_bus_error *error) {
181+
182+
int r;
183+
184+
assert(u);
185+
assert(c);
186+
assert(name);
187+
assert(message);
188+
189+
if (streq(name, "Delegate")) {
190+
int b;
191+
192+
r = sd_bus_message_read(message, "b", &b);
193+
if (r < 0)
194+
return r;
195+
196+
if (mode != UNIT_CHECK) {
197+
c->delegate = b;
198+
unit_write_drop_in_private(u, mode, name, b ? "Delegate=yes" : "Delegate=no");
199+
}
200+
201+
return 1;
202+
}
203+
204+
return 0;
205+
}
206+
173207
int bus_cgroup_set_property(
174208
Unit *u,
175209
CGroupContext *c,
@@ -632,6 +666,14 @@ int bus_cgroup_set_property(
632666
}
633667

634668
return 1;
669+
670+
}
671+
672+
if (u->transient && u->load_state == UNIT_STUB) {
673+
r = bus_cgroup_set_transient_property(u, c, name, message, mode, error);
674+
if (r != 0)
675+
return r;
676+
635677
}
636678

637679
return 0;

src/core/execute.c

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,8 +1444,10 @@ static int exec_child(ExecCommand *command,
14441444
}
14451445
#endif
14461446

1447-
#ifdef HAVE_PAM
1448-
if (params->cgroup_path && context->user && context->pam_name) {
1447+
/* If delegation is enabled we'll pass ownership of the cgroup
1448+
* (but only in systemd's own controller hierarchy!) to the
1449+
* user of the new process. */
1450+
if (params->cgroup_path && context->user && params->cgroup_delegate) {
14491451
err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
14501452
if (err < 0) {
14511453
*error = EXIT_CGROUP;
@@ -1459,7 +1461,6 @@ static int exec_child(ExecCommand *command,
14591461
return err;
14601462
}
14611463
}
1462-
#endif
14631464

14641465
if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
14651466
char **rt;
@@ -2402,6 +2403,21 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
24022403
prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
24032404
}
24042405

2406+
bool exec_context_maintains_privileges(ExecContext *c) {
2407+
assert(c);
2408+
2409+
/* Returns true if the process forked off would run run under
2410+
* an unchanged UID or as root. */
2411+
2412+
if (!c->user)
2413+
return true;
2414+
2415+
if (streq(c->user, "root") || streq(c->user, "0"))
2416+
return true;
2417+
2418+
return false;
2419+
}
2420+
24052421
void exec_status_start(ExecStatus *s, pid_t pid) {
24062422
assert(s);
24072423

src/core/execute.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ struct ExecParameters {
207207
bool selinux_context_net;
208208
CGroupControllerMask cgroup_supported;
209209
const char *cgroup_path;
210+
bool cgroup_delegate;
210211
const char *runtime_prefix;
211212
const char *unit_id;
212213
usec_t watchdog_usec;
@@ -244,6 +245,7 @@ int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_r
244245
int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l);
245246

246247
bool exec_context_may_touch_console(ExecContext *c);
248+
bool exec_context_maintains_privileges(ExecContext *c);
247249

248250
void exec_status_start(ExecStatus *s, pid_t pid);
249251
void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status);

src/core/load-fragment-gperf.gperf.m4

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ $1.BlockIOWeight, config_parse_blockio_weight, 0,
119119
$1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight)
120120
$1.BlockIODeviceWeight, config_parse_blockio_device_weight, 0, offsetof($1, cgroup_context)
121121
$1.BlockIOReadBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)
122-
$1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)'
122+
$1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)
123+
$1.Delegate, config_parse_bool, 0, offsetof($1, cgroup_context.delegate)'
123124
)m4_dnl
124125
Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description)
125126
Unit.Documentation, config_parse_documentation, 0, offsetof(Unit, documentation)

src/core/mount.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,7 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
715715
exec_params.confirm_spawn = UNIT(m)->manager->confirm_spawn;
716716
exec_params.cgroup_supported = UNIT(m)->manager->cgroup_supported;
717717
exec_params.cgroup_path = UNIT(m)->cgroup_path;
718+
exec_params.cgroup_delegate = m->cgroup_context.delegate;
718719
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(m)->manager);
719720
exec_params.unit_id = UNIT(m)->id;
720721

src/core/service.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,6 +1000,7 @@ static int service_spawn(
10001000
exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
10011001
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
10021002
exec_params.cgroup_path = path;
1003+
exec_params.cgroup_delegate = s->cgroup_context.delegate;
10031004
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(s)->manager);
10041005
exec_params.unit_id = UNIT(s)->id;
10051006
exec_params.watchdog_usec = s->watchdog_usec;

src/core/socket.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1414,6 +1414,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
14141414
exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
14151415
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
14161416
exec_params.cgroup_path = UNIT(s)->cgroup_path;
1417+
exec_params.cgroup_delegate = s->cgroup_context.delegate;
14171418
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(s)->manager);
14181419
exec_params.unit_id = UNIT(s)->id;
14191420

0 commit comments

Comments
 (0)