diff --git a/nixos/doc/manual/release-notes/rl-1903.xml b/nixos/doc/manual/release-notes/rl-1903.xml index bbd3cf2e9db5..7c94f6e9473e 100644 --- a/nixos/doc/manual/release-notes/rl-1903.xml +++ b/nixos/doc/manual/release-notes/rl-1903.xml @@ -68,6 +68,17 @@ for details. + + + There is now a set of options for + , which allows to restrict services + into a + chroot + 2 + ed environment that only contains the store paths from + the runtime closure of the service. + + diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index fad7f336c995..374e39f553fa 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -172,6 +172,7 @@ ./security/rtkit.nix ./security/wrappers/default.nix ./security/sudo.nix + ./security/systemd-confinement.nix ./services/admin/oxidized.nix ./services/admin/salt/master.nix ./services/admin/salt/minion.nix diff --git a/nixos/modules/security/systemd-confinement.nix b/nixos/modules/security/systemd-confinement.nix new file mode 100644 index 000000000000..cd4eb81dbe19 --- /dev/null +++ b/nixos/modules/security/systemd-confinement.nix @@ -0,0 +1,199 @@ +{ config, pkgs, lib, ... }: + +let + toplevelConfig = config; + inherit (lib) types; + inherit (import ../system/boot/systemd-lib.nix { + inherit config pkgs lib; + }) mkPathSafeName; +in { + options.systemd.services = lib.mkOption { + type = types.attrsOf (types.submodule ({ name, config, ... }: { + options.confinement.enable = lib.mkOption { + type = types.bool; + default = false; + description = '' + If set, all the required runtime store paths for this service are + bind-mounted into a tmpfs-based + chroot + 2 + . + ''; + }; + + options.confinement.fullUnit = lib.mkOption { + type = types.bool; + default = false; + description = '' + Whether to include the full closure of the systemd unit file into the + chroot, instead of just the dependencies for the executables. + + While it may be tempting to just enable this option to + make things work quickly, please be aware that this might add paths + to the closure of the chroot that you didn't anticipate. It's better + to use to explicitly add additional store paths to the + chroot. + ''; + }; + + options.confinement.packages = lib.mkOption { + type = types.listOf (types.either types.str types.package); + default = []; + description = let + mkScOption = optName: ""; + in '' + Additional packages or strings with context to add to the closure of + the chroot. By default, this includes all the packages from the + ${lib.concatMapStringsSep ", " mkScOption [ + "ExecReload" "ExecStartPost" "ExecStartPre" "ExecStop" + "ExecStopPost" + ]} and ${mkScOption "ExecStart"} options. If you want to have all the + dependencies of this systemd unit, you can use + . + + The store paths listed in are + not included in the closure as + well as paths from other options except those listed + above. + ''; + }; + + options.confinement.binSh = lib.mkOption { + type = types.nullOr types.path; + default = toplevelConfig.environment.binsh; + defaultText = "config.environment.binsh"; + example = lib.literalExample "\${pkgs.dash}/bin/dash"; + description = '' + The program to make available as /bin/sh inside + the chroot. If this is set to null, no + /bin/sh is provided at all. + + This is useful for some applications, which for example use the + + system + 3 + library function to execute commands. + ''; + }; + + options.confinement.mode = lib.mkOption { + type = types.enum [ "full-apivfs" "chroot-only" ]; + default = "full-apivfs"; + description = '' + The value full-apivfs (the default) sets up + private /dev, /proc, /sys and /tmp file systems in a separate user + name space. + + If this is set to chroot-only, only the file + system name space is set up along with the call to + chroot + 2 + . + + This doesn't cover network namespaces and is solely for + file system level isolation. + ''; + }; + + config = let + rootName = "${mkPathSafeName name}-chroot"; + inherit (config.confinement) binSh fullUnit; + wantsAPIVFS = lib.mkDefault (config.confinement.mode == "full-apivfs"); + in lib.mkIf config.confinement.enable { + serviceConfig = { + RootDirectory = pkgs.runCommand rootName {} "mkdir \"$out\""; + TemporaryFileSystem = "/"; + PrivateMounts = lib.mkDefault true; + + # https://github.com/NixOS/nixpkgs/issues/14645 is a future attempt + # to change some of these to default to true. + # + # If we run in chroot-only mode, having something like PrivateDevices + # set to true by default will mount /dev within the chroot, whereas + # with "chroot-only" it's expected that there are no /dev, /proc and + # /sys file systems available. + # + # However, if this suddenly becomes true, the attack surface will + # increase, so let's explicitly set these options to true/false + # depending on the mode. + MountAPIVFS = wantsAPIVFS; + PrivateDevices = wantsAPIVFS; + PrivateTmp = wantsAPIVFS; + PrivateUsers = wantsAPIVFS; + ProtectControlGroups = wantsAPIVFS; + ProtectKernelModules = wantsAPIVFS; + ProtectKernelTunables = wantsAPIVFS; + }; + confinement.packages = let + execOpts = [ + "ExecReload" "ExecStart" "ExecStartPost" "ExecStartPre" "ExecStop" + "ExecStopPost" + ]; + execPkgs = lib.concatMap (opt: let + isSet = config.serviceConfig ? ${opt}; + in lib.optional isSet config.serviceConfig.${opt}) execOpts; + unitAttrs = toplevelConfig.systemd.units."${name}.service"; + allPkgs = lib.singleton (builtins.toJSON unitAttrs); + unitPkgs = if fullUnit then allPkgs else execPkgs; + in unitPkgs ++ lib.optional (binSh != null) binSh; + }; + })); + }; + + config.assertions = lib.concatLists (lib.mapAttrsToList (name: cfg: let + whatOpt = optName: "The 'serviceConfig' option '${optName}' for" + + " service '${name}' is enabled in conjunction with" + + " 'confinement.enable'"; + in lib.optionals cfg.confinement.enable [ + { assertion = !cfg.serviceConfig.RootDirectoryStartOnly or false; + message = "${whatOpt "RootDirectoryStartOnly"}, but right now systemd" + + " doesn't support restricting bind-mounts to 'ExecStart'." + + " Please either define a separate service or find a way to run" + + " commands other than ExecStart within the chroot."; + } + { assertion = !cfg.serviceConfig.DynamicUser or false; + message = "${whatOpt "DynamicUser"}. Please create a dedicated user via" + + " the 'users.users' option instead as this combination is" + + " currently not supported."; + } + ]) config.systemd.services); + + config.systemd.packages = lib.concatLists (lib.mapAttrsToList (name: cfg: let + rootPaths = let + contents = lib.concatStringsSep "\n" cfg.confinement.packages; + in pkgs.writeText "${mkPathSafeName name}-string-contexts.txt" contents; + + chrootPaths = pkgs.runCommand "${mkPathSafeName name}-chroot-paths" { + closureInfo = pkgs.closureInfo { inherit rootPaths; }; + serviceName = "${name}.service"; + excludedPath = rootPaths; + } '' + mkdir -p "$out/lib/systemd/system" + serviceFile="$out/lib/systemd/system/$serviceName" + + echo '[Service]' > "$serviceFile" + + # /bin/sh is special here, because the option value could contain a + # symlink and we need to properly resolve it. + ${lib.optionalString (cfg.confinement.binSh != null) '' + binsh=${lib.escapeShellArg cfg.confinement.binSh} + realprog="$(readlink -e "$binsh")" + echo "BindReadOnlyPaths=$realprog:/bin/sh" >> "$serviceFile" + ''} + + while read storePath; do + if [ -L "$storePath" ]; then + # Currently, systemd can't cope with symlinks in Bind(ReadOnly)Paths, + # so let's just bind-mount the target to that location. + echo "BindReadOnlyPaths=$(readlink -e "$storePath"):$storePath" + elif [ "$storePath" != "$excludedPath" ]; then + echo "BindReadOnlyPaths=$storePath" + fi + done < "$closureInfo/store-paths" >> "$serviceFile" + ''; + in lib.optional cfg.confinement.enable chrootPaths) config.systemd.services); +} diff --git a/nixos/modules/system/boot/systemd-lib.nix b/nixos/modules/system/boot/systemd-lib.nix index 68a40377ee13..28ad4f121bbe 100644 --- a/nixos/modules/system/boot/systemd-lib.nix +++ b/nixos/modules/system/boot/systemd-lib.nix @@ -9,12 +9,11 @@ in rec { shellEscape = s: (replaceChars [ "\\" ] [ "\\\\" ] s); + mkPathSafeName = lib.replaceChars ["@" ":" "\\" "[" "]"] ["-" "-" "-" "" ""]; + makeUnit = name: unit: - let - pathSafeName = lib.replaceChars ["@" ":" "\\" "[" "]"] ["-" "-" "-" "" ""] name; - in if unit.enable then - pkgs.runCommand "unit-${pathSafeName}" + pkgs.runCommand "unit-${mkPathSafeName name}" { preferLocalBuild = true; allowSubstitutes = false; inherit (unit) text; @@ -24,7 +23,7 @@ in rec { echo -n "$text" > $out/${shellEscape name} '' else - pkgs.runCommand "unit-${pathSafeName}-disabled" + pkgs.runCommand "unit-${mkPathSafeName name}-disabled" { preferLocalBuild = true; allowSubstitutes = false; } diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index 49bbe24fdc0e..395dc22f9682 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -221,6 +221,7 @@ in switchTest = handleTest ./switch-test.nix {}; syncthing-relay = handleTest ./syncthing-relay.nix {}; systemd = handleTest ./systemd.nix {}; + systemd-confinement = handleTest ./systemd-confinement.nix {}; taskserver = handleTest ./taskserver.nix {}; telegraf = handleTest ./telegraf.nix {}; tomcat = handleTest ./tomcat.nix {}; diff --git a/nixos/tests/systemd-confinement.nix b/nixos/tests/systemd-confinement.nix new file mode 100644 index 000000000000..b7b10fb36aac --- /dev/null +++ b/nixos/tests/systemd-confinement.nix @@ -0,0 +1,168 @@ +import ./make-test.nix { + name = "systemd-confinement"; + + machine = { pkgs, lib, ... }: let + testServer = pkgs.writeScript "testserver.sh" '' + #!${pkgs.stdenv.shell} + export PATH=${lib.escapeShellArg "${pkgs.coreutils}/bin"} + ${lib.escapeShellArg pkgs.stdenv.shell} 2>&1 + echo "exit-status:$?" + ''; + + testClient = pkgs.writeScriptBin "chroot-exec" '' + #!${pkgs.stdenv.shell} -e + output="$(echo "$@" | nc -NU "/run/test$(< /teststep).sock")" + ret="$(echo "$output" | sed -nre '$s/^exit-status:([0-9]+)$/\1/p')" + echo "$output" | head -n -1 + exit "''${ret:-1}" + ''; + + mkTestStep = num: { description, config ? {}, testScript }: { + systemd.sockets."test${toString num}" = { + description = "Socket for Test Service ${toString num}"; + wantedBy = [ "sockets.target" ]; + socketConfig.ListenStream = "/run/test${toString num}.sock"; + socketConfig.Accept = true; + }; + + systemd.services."test${toString num}@" = { + description = "Confined Test Service ${toString num}"; + confinement = (config.confinement or {}) // { enable = true; }; + serviceConfig = (config.serviceConfig or {}) // { + ExecStart = testServer; + StandardInput = "socket"; + }; + } // removeAttrs config [ "confinement" "serviceConfig" ]; + + __testSteps = lib.mkOrder num '' + subtest '${lib.escape ["\\" "'"] description}', sub { + $machine->succeed('echo ${toString num} > /teststep'); + ${testScript} + }; + ''; + }; + + in { + imports = lib.imap1 mkTestStep [ + { description = "chroot-only confinement"; + config.confinement.mode = "chroot-only"; + testScript = '' + $machine->succeed( + 'test "$(chroot-exec ls -1 / | paste -sd,)" = bin,nix', + 'test "$(chroot-exec id -u)" = 0', + 'chroot-exec chown 65534 /bin', + ); + ''; + } + { description = "full confinement with APIVFS"; + testScript = '' + $machine->fail( + 'chroot-exec ls -l /etc', + 'chroot-exec ls -l /run', + 'chroot-exec chown 65534 /bin', + ); + $machine->succeed( + 'test "$(chroot-exec id -u)" = 0', + 'chroot-exec chown 0 /bin', + ); + ''; + } + { description = "check existence of bind-mounted /etc"; + config.serviceConfig.BindReadOnlyPaths = [ "/etc" ]; + testScript = '' + $machine->succeed('test -n "$(chroot-exec cat /etc/passwd)"'); + ''; + } + { description = "check if User/Group really runs as non-root"; + config.serviceConfig.User = "chroot-testuser"; + config.serviceConfig.Group = "chroot-testgroup"; + testScript = '' + $machine->succeed('chroot-exec ls -l /dev'); + $machine->succeed('test "$(chroot-exec id -u)" != 0'); + $machine->fail('chroot-exec touch /bin/test'); + ''; + } + (let + symlink = pkgs.runCommand "symlink" { + target = pkgs.writeText "symlink-target" "got me\n"; + } "ln -s \"$target\" \"$out\""; + in { + description = "check if symlinks are properly bind-mounted"; + config.confinement.packages = lib.singleton symlink; + testScript = '' + $machine->fail('chroot-exec test -e /etc'); + $machine->succeed('chroot-exec cat ${symlink} >&2'); + $machine->succeed('test "$(chroot-exec cat ${symlink})" = "got me"'); + ''; + }) + { description = "check if StateDirectory works"; + config.serviceConfig.User = "chroot-testuser"; + config.serviceConfig.Group = "chroot-testgroup"; + config.serviceConfig.StateDirectory = "testme"; + testScript = '' + $machine->succeed('chroot-exec touch /tmp/canary'); + $machine->succeed('chroot-exec "echo works > /var/lib/testme/foo"'); + $machine->succeed('test "$(< /var/lib/testme/foo)" = works'); + $machine->succeed('test ! -e /tmp/canary'); + ''; + } + { description = "check if /bin/sh works"; + testScript = '' + $machine->succeed( + 'chroot-exec test -e /bin/sh', + 'test "$(chroot-exec \'/bin/sh -c "echo bar"\')" = bar', + ); + ''; + } + { description = "check if suppressing /bin/sh works"; + config.confinement.binSh = null; + testScript = '' + $machine->succeed( + 'chroot-exec test ! -e /bin/sh', + 'test "$(chroot-exec \'/bin/sh -c "echo foo"\')" != foo', + ); + ''; + } + { description = "check if we can set /bin/sh to something different"; + config.confinement.binSh = "${pkgs.hello}/bin/hello"; + testScript = '' + $machine->succeed( + 'chroot-exec test -e /bin/sh', + 'test "$(chroot-exec /bin/sh -g foo)" = foo', + ); + ''; + } + { description = "check if only Exec* dependencies are included"; + config.environment.FOOBAR = pkgs.writeText "foobar" "eek\n"; + testScript = '' + $machine->succeed('test "$(chroot-exec \'cat "$FOOBAR"\')" != eek'); + ''; + } + { description = "check if all unit dependencies are included"; + config.environment.FOOBAR = pkgs.writeText "foobar" "eek\n"; + config.confinement.fullUnit = true; + testScript = '' + $machine->succeed('test "$(chroot-exec \'cat "$FOOBAR"\')" = eek'); + ''; + } + ]; + + options.__testSteps = lib.mkOption { + type = lib.types.lines; + description = "All of the test steps combined as a single script."; + }; + + config.environment.systemPackages = lib.singleton testClient; + + config.users.groups.chroot-testgroup = {}; + config.users.users.chroot-testuser = { + description = "Chroot Test User"; + group = "chroot-testgroup"; + }; + }; + + testScript = { nodes, ... }: '' + $machine->waitForUnit('multi-user.target'); + ${nodes.machine.config.__testSteps} + ''; +}