Skip to content

Commit

Permalink
nixos/acme: Allow for time window between cert issue and activation
Browse files Browse the repository at this point in the history
  • Loading branch information
Gregor Kleen committed Nov 19, 2017
1 parent 7c24077 commit e70d293
Showing 1 changed file with 53 additions and 12 deletions.
65 changes: 53 additions & 12 deletions nixos/modules/security/acme.nix
Expand Up @@ -57,9 +57,11 @@ let
default = "";
example = "systemctl reload nginx.service";
description = ''
Commands to run after certificates are re-issued. Typically
Commands to run after new certificates go live. Typically
the web server and other servers using certificates need to
be reloaded.
Executed in the same directory with the new certificate.
'';
};

Expand All @@ -77,6 +79,27 @@ let
'';
};

activationDelay = mkOption {
type = types.nullOr types.str;
default = null;
description = ''
Systemd time span expression to delay copying new certificates to main
state directory. See <citerefentry><refentrytitle>systemd.time</refentrytitle>
<manvolnum>7</manvolnum></citerefentry>.
'';
};

preDelay = mkOption {
type = types.lines;
default = "";
description = ''
Commands to run after certificates are re-issued but before they are
activated. Typically the new certificate is published to DNS.
Executed in the same directory with the new certificate.
'';
};

extraDomains = mkOption {
type = types.attrsOf (types.nullOr types.str);
default = {};
Expand Down Expand Up @@ -186,14 +209,14 @@ in
certToServices = cert: data:
let
domain = if data.domain != null then data.domain else cert;
cpath = "${cfg.directory}/${cert}";
cpath = lpath + optionalString (data.activationDelay != null) ".staging";
lpath = "${cfg.directory}/${cert}";
rights = if data.allowKeysForGroup then "750" else "700";
cmdline = [ "-v" "-d" domain "--default_root" data.webroot "--valid_min" cfg.validMin ]
++ optionals (data.email != null) [ "--email" data.email ]
++ concatMap (p: [ "-f" p ]) data.plugins
++ concatLists (mapAttrsToList (name: root: [ "-d" (if root == null then name else "${name}:${root}")]) data.extraDomains)
++ (if cfg.production then []
else ["--server" "https://acme-staging.api.letsencrypt.org/directory"]);
++ optionals (!cfg.production) ["--server" "https://acme-staging.api.letsencrypt.org/directory"];
acmeService = {
description = "Renew ACME Certificate for ${cert}";
after = [ "network.target" "network-online.target" ];
Expand All @@ -206,7 +229,7 @@ in
Group = data.group;
PrivateTmp = true;
};
path = [ pkgs.simp_le ];
path = with pkgs; [ simp_le systemd ];
preStart = ''
mkdir -p '${cfg.directory}'
chown 'root:root' '${cfg.directory}'
Expand All @@ -229,15 +252,36 @@ in
exit "$EXITCODE"
'';
postStop = ''
cd '${cpath}'
if [ -e /tmp/lastExitCode ] && [ "$(cat /tmp/lastExitCode)" = "0" ]; then
echo "Executing postRun hook..."
${data.postRun}
${if data.activationDelay != null then ''
${data.preDelay}
if [ -d '${lpath}' ]; then

This comment has been minimized.

Copy link
@arianvp

arianvp May 12, 2019

Member

Hey @pngwjpgh I have a question about this piece of code. I'm currently refactoring the acme module, and was wondering why we only have an activationDelay when the lpath does exist? Doesn't this mean there is no delay at all the first time you request the cert? I don't want to break expected behaviour, but it's not clear to me what the expected behaviour should be. Why is this conditional check being done here?

This comment has been minimized.

Copy link
@gkleen

gkleen May 12, 2019

Contributor

Expected behaviour (to me) is exactly as you describe; i.e. no delay on first activation.

I made that decision so there would always be some certificate in place (expected use case for the delayed activation is to give a time window for publishing the new certificate in e.g. DNS, no such delay is needed, though, if this is the first certificate ever requested since there, presumably, isn't an earlier certificate currently published via DNS, which would require a graceful update).

This comment has been minimized.

Copy link
@arianvp

arianvp May 12, 2019

Member

Because we're deprecating the PermissionStartOnly option in systemd, we can't call systemctl anymore in postRun script as we don't have sufficient permissions.

Given systemd can order dependencies itself (no need to call systemctl in a postRun manually), I was thinking to do the following:

# Either immediatelly set live, or have a delay of "data.activationDelay" seconds
acme-${cert}.serviceConfig.{Requires,Before} =  if (data.activationDelay != null) "acme-setlive-${cert}.timer" else "acme-setlive-${cert}.service"

...
timers.acme-setlive-${cert}.timerConfig.OnActive = data.activationDelay;

This would fix the problem of the service not having enough permissions to call systemd-run/systemctl but the behaviour changes slightly: there is always a delay, also when the initial cert is requested. Do you think that would be a fair tradeoff? Please let me know, since you're actually using this feature and I don't want to break somebody else's workflow for no good reason @pngwjpgh

I'm not sure how to do the conditional delay based on the existence of a directory with just systemd to get the
original behaviour.
Perhaps I could hack something with ConditionPathExist and another intermediate unit, will have to look at it. It's
probably possible, just needs a bit more thought

This comment has been minimized.

Copy link
@gkleen

gkleen May 12, 2019

Contributor

Having a delay on first activation is fine for my workflow and probably more intuitive, anyway; having some certificate at all times is what preliminarySelfSigned is for :-)

Starting the time via service dependencies is fine by me and has other advantages, like surviving reboots.

This comment has been minimized.

Copy link
@arianvp

arianvp Jun 30, 2019

Member

Would you be okay with me deprecating activationDelay altogether? It seems rather arbitrary. e.g. what happens when the delay expired, but the cert still wasn't published to DNS?

I think a good replacement instead would be to create a acme-wait-dns-published.service yourself and then do:

# acme-wait-dns-published.service
RequiredBy=acme-${cert}.service
After=acme-${cert}.service

This way, the delay isn't an arbitrary number, but mandated by the script that you use to publish to DANE/TLSA.
and it greatly simplifies the letsencrypt activation logic, which is now hard to follow.

systemd-run --no-block --on-active='${data.activationDelay}' --unit acme-setlive-${cert}.service
else
systemctl --wait start acme-setlive-${cert}.service
fi
'' else data.postRun}
fi
'';

before = [ "acme-certificates.target" ];
wantedBy = [ "acme-certificates.target" ];
};
delayService = {
description = "Set certificate for ${cert} live";
path = with pkgs; [ rsync ];
serviceConfig = {
Type = "oneshot";
};
script = ''
rsync -a --delete-after '${cpath}/' '${lpath}'
'';
postStop = data.postRun;
};
selfsignedService = {
description = "Create preliminary self-signed certificate for ${cert}";
preStart = ''
Expand Down Expand Up @@ -297,11 +341,8 @@ in
};
in (
[ { name = "acme-${cert}"; value = acmeService; } ]
++
(if cfg.preliminarySelfsigned
then [ { name = "acme-selfsigned-${cert}"; value = selfsignedService; } ]
else []
)
++ optional cfg.preliminarySelfsigned { name = "acme-selfsigned-${cert}"; value = selfsignedService; }
++ optional (data.activationDelay != null) { name = "acme-setlive-${cert}"; value = delayService; }
);
servicesAttr = listToAttrs services;
injectServiceDep = {
Expand Down

0 comments on commit e70d293

Please sign in to comment.