From 3ee25d42c7f705dcdeb01a3dc40c3452b18896b0 Mon Sep 17 00:00:00 2001 From: Wilhelm Schuster Date: Sun, 25 Sep 2016 17:33:01 +0200 Subject: [PATCH] containers module: Add option to enable tunnel access This adds the containers..enableTun option allowing containers to access /dev/net/tun. This is required by openvpn, tinc, etc. in order to work properly inside containers. The new option builds on top of two generic options containers..additionalCapabilities and containers..allowedDevices which also can be used for example when adding support for FUSE later down the road. Backported to 16.09. --- nixos/modules/virtualisation/containers.nix | 165 ++++++++++++++------ 1 file changed, 118 insertions(+), 47 deletions(-) diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index f570813805cfa2..1410a14817040e 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -129,6 +129,9 @@ let --setenv HOST_ADDRESS6="$HOST_ADDRESS6" \ --setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \ --setenv PATH="$PATH" \ + ${if cfg.additionalCapabilities != null then + ''--capability="${concatStringsSep " " cfg.additionalCapabilities}"'' else "" + } \ ${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init" ''; @@ -205,6 +208,41 @@ let '' ); + serviceDirectives = cfg: { + ExecReload = pkgs.writeScript "reload-container" + '' + #! ${pkgs.stdenv.shell} -e + ${pkgs.nixos-container}/bin/nixos-container run "$INSTANCE" -- \ + bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test" + ''; + + SyslogIdentifier = "container %i"; + + EnvironmentFile = "-/etc/containers/%i.conf"; + + Type = "notify"; + + # Note that on reboot, systemd-nspawn returns 133, so this + # unit will be restarted. On poweroff, it returns 0, so the + # unit won't be restarted. + RestartForceExitStatus = "133"; + SuccessExitStatus = "133"; + + Restart = "on-failure"; + + # Hack: we don't want to kill systemd-nspawn, since we call + # "machinectl poweroff" in preStop to shut down the + # container cleanly. But systemd requires sending a signal + # (at least if we want remaining processes to be killed + # after the timeout). So send an ignored signal. + KillMode = "mixed"; + KillSignal = "WINCH"; + + DevicePolicy = "closed"; + DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices; + }; + + system = config.nixpkgs.system; bindMountOpts = { name, config, ... }: { @@ -235,6 +273,27 @@ let }; + allowedDeviceOpts = { name, config, ... }: { + options = { + node = mkOption { + example = "/dev/net/tun"; + type = types.str; + description = "Path to device node"; + }; + modifier = mkOption { + example = "rw"; + type = types.str; + description = '' + Device node access modifier. Takes a combination + r (read), w (write), and + m (mknod). See the + systemd.resource-control(5) man page for more + information.''; + }; + }; + }; + + mkBindFlag = d: let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind="; mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}"; @@ -302,6 +361,8 @@ let dummyConfig = { extraVeths = {}; + additionalCapabilities = []; + allowedDevices = []; hostAddress = null; hostAddress6 = null; localAddress = null; @@ -368,6 +429,26 @@ in ''; }; + additionalCapabilities = mkOption { + type = types.listOf types.str; + default = []; + example = [ "CAP_NET_ADMIN" "CAP_MKNOD" ]; + description = '' + Grant additional capabilities to the container. See the + capabilities(7) and systemd-nspawn(1) man pages for more + information. + ''; + }; + enableTun = mkOption { + type = types.bool; + default = false; + description = '' + Allows the container to create and setup tunnel interfaces + by granting the NET_ADMIN capability and + enabling access to /dev/net/tun. + ''; + }; + privateNetwork = mkOption { type = types.bool; default = false; @@ -422,6 +503,16 @@ in ''; }; + allowedDevices = mkOption { + type = types.listOf types.optionSet; + options = [ allowedDeviceOpts ]; + default = []; + example = [ { node = "/dev/net/tun"; modifier = "rw"; } ]; + description = '' + A list of device nodes to which the containers has access to. + ''; + }; + } // networkOptions; config = mkMerge @@ -488,59 +579,39 @@ in restartIfChanged = false; - serviceConfig = { - ExecReload = pkgs.writeScript "reload-container" - '' - #! ${pkgs.stdenv.shell} -e - ${pkgs.nixos-container}/bin/nixos-container run "$INSTANCE" -- \ - bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test" - ''; - - SyslogIdentifier = "container %i"; - - EnvironmentFile = "-/etc/containers/%i.conf"; - - Type = "notify"; - - # Note that on reboot, systemd-nspawn returns 133, so this - # unit will be restarted. On poweroff, it returns 0, so the - # unit won't be restarted. - RestartForceExitStatus = "133"; - SuccessExitStatus = "133"; - - Restart = "on-failure"; - - # Hack: we don't want to kill systemd-nspawn, since we call - # "machinectl poweroff" in preStop to shut down the - # container cleanly. But systemd requires sending a signal - # (at least if we want remaining processes to be killed - # after the timeout). So send an ignored signal. - KillMode = "mixed"; - KillSignal = "WINCH"; - - DevicePolicy = "closed"; - }; + serviceConfig = serviceDirectives dummyConfig; }; in { systemd.services = listToAttrs (filter (x: x.value != null) ( # The generic container template used by imperative containers [{ name = "container@"; value = unit; }] # declarative containers - ++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" ( - unit // { - preStart = preStartScript cfg; - script = startScript cfg; - postStart = postStartScript cfg; - } // ( - if cfg.autoStart then - { - wantedBy = [ "multi-user.target" ]; - wants = [ "network.target" ]; - after = [ "network.target" ]; - restartTriggers = [ cfg.path ]; - reloadIfChanged = true; - } - else {}) + ++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (let + config = cfg // ( + if cfg.enableTun then + { + allowedDevices = cfg.allowedDevices + ++ [ { node = "/dev/net/tun"; modifier = "rw"; } ]; + additionalCapabilities = cfg.additionalCapabilities + ++ [ "CAP_NET_ADMIN" ]; + } + else {}); + in + unit // { + preStart = preStartScript config; + script = startScript config; + postStart = postStartScript config; + serviceConfig = serviceDirectives config; + } // ( + if config.autoStart then + { + wantedBy = [ "multi-user.target" ]; + wants = [ "network.target" ]; + after = [ "network.target" ]; + restartTriggers = [ config.path ]; + reloadIfChanged = true; + } + else {}) )) config.containers) ));