From 632c8559b95be9b2e62e3386bfd34dd9377b686e Mon Sep 17 00:00:00 2001 From: Federico Mastellone Date: Mon, 22 May 2023 16:13:28 +0000 Subject: [PATCH] workbench: allow custom supervisord.conf and only the needed Nomad templates --- nix/workbench/backend/nomad-job.nix | 122 ++++++++++++---------- nix/workbench/backend/nomad.nix | 19 +--- nix/workbench/backend/supervisor-conf.nix | 41 ++++---- nix/workbench/backend/supervisor.nix | 6 +- 4 files changed, 94 insertions(+), 94 deletions(-) diff --git a/nix/workbench/backend/nomad-job.nix b/nix/workbench/backend/nomad-job.nix index 6545cc0f0ed..d8bb652044a 100644 --- a/nix/workbench/backend/nomad-job.nix +++ b/nix/workbench/backend/nomad-job.nix @@ -3,12 +3,11 @@ # clusters and SRE infrastructure used for long-running cloud benchmarks. Why? # To make it easier to improve and debug the almighty workbench! ################################################################################ -{ lib +{ pkgs +, lib , stateDir , profileData , containerSpecs -# Needs unix_http_server.file -, supervisorConf , execTaskDriver , oneTracerPerNode ? false }: @@ -58,7 +57,8 @@ let # the container I get (from journald): # Nov 02 11:44:36 hostname cluster-18f3852f-e067-6394-8159-66a7b8da2ecc[1088457]: Error: Cannot open an HTTP server: socket.error reported -2 # Nov 02 11:44:36 hostname cluster-18f3852f-e067-6394-8159-66a7b8da2ecc[1088457]: For help, use /nix/store/izqhlj5i1x9ldyn43d02kcy4mafmj3ci-python3.9-supervisor-4.2.4/bin/supervisord -h - task_supervisord_url = "unix://${supervisorConf.value.unix_http_server.file}"; + unixHttpServerPort = "/tmp/supervisor.sock"; + task_supervisord_url = "unix://${unixHttpServerPort}"; # Location of the supervisord config file inside the container. # This file can be mounted as a volume or created as a template. task_supervisord_conf = "${task_statedir}/supervisor/supervisord.conf"; @@ -452,8 +452,21 @@ let { env = false; destination = "${task_supervisord_conf}"; - data = escapeTemplate (__readFile - supervisorConf.INI); + data = escapeTemplate (__readFile ( + let supervisorConf = import ./supervisor-conf.nix + { inherit pkgs lib stateDir; + # Include only this taks' node + nodeSpecs = if taskName == "tracer" + then {} + else {"${nodeSpec.name}"=nodeSpec;} + ; + # Only for the tracer task or also nodes if oneTracerPerNode + withTracer = oneTracerPerNode || taskName == "tracer"; + # ''{{ env "NOMAD_TASK_DIR" }}/supervisor.sock'' + inherit unixHttpServerPort; + }; + in supervisorConf.INI + )); change_mode = "noop"; error_on_missing_key = true; } @@ -521,54 +534,51 @@ let } ]) ++ - # Node(s) - (lib.lists.flatten (lib.mapAttrsToList - (_: nodeSpec: [ - ## Node start.sh script. - { - env = false; - destination = "${task_statedir}/${nodeSpec.name}/start.sh"; - data = escapeTemplate ( - let scriptValue = profileData.node-services."${nodeSpec.name}".startupScript.value; - in if execTaskDriver - then (startScriptToGoTemplate - nodeSpec.name - ("perf-" + nodeSpec.name) - ("node" + (toString nodeSpec.i)) - nodeSpec - scriptValue - ) - else scriptValue - ); - change_mode = "noop"; - error_on_missing_key = true; - perms = "744"; # Only for every "start.sh" script. Default: "644" - } - ## Node configuration file. - { - env = false; - destination = "${task_statedir}/${nodeSpec.name}/config.json"; - data = escapeTemplate (lib.generators.toJSON {} - profileData.node-services."${nodeSpec.name}".nodeConfig.value); - change_mode = "noop"; - error_on_missing_key = true; - } - ## Node topology file. - { - env = false; - destination = "${task_statedir}/${nodeSpec.name}/topology.json"; - data = escapeTemplate ( - let topology = profileData.node-services."${nodeSpec.name}".topology; - in if execTaskDriver - then (topologyToGoTemplate topology.value) - else (__readFile topology.JSON ) - ); - change_mode = "noop"; - error_on_missing_key = true; - } - ]) - profileData.node-specs.value - )) + # Node + (lib.optionals (taskName != "tracer") [ + ## Node start.sh script. + { + env = false; + destination = "${task_statedir}/${nodeSpec.name}/start.sh"; + data = escapeTemplate ( + let scriptValue = profileData.node-services."${nodeSpec.name}".startupScript.value; + in if execTaskDriver + then (startScriptToGoTemplate + taskName # taskName + serviceName # serviceName + portName # portName (can't have "-") + nodeSpec # nodeSpec + scriptValue # startScript + ) + else scriptValue + ); + change_mode = "noop"; + error_on_missing_key = true; + perms = "744"; # Only for every "start.sh" script. Default: "644" + } + ## Node configuration file. + { + env = false; + destination = "${task_statedir}/${nodeSpec.name}/config.json"; + data = escapeTemplate (lib.generators.toJSON {} + profileData.node-services."${nodeSpec.name}".nodeConfig.value); + change_mode = "noop"; + error_on_missing_key = true; + } + ## Node topology file. + { + env = false; + destination = "${task_statedir}/${nodeSpec.name}/topology.json"; + data = escapeTemplate ( + let topology = profileData.node-services."${nodeSpec.name}".topology; + in if execTaskDriver + then (topologyToGoTemplate topology.value) + else (__readFile topology.JSON ) + ); + change_mode = "noop"; + error_on_missing_key = true; + } + ]) ; # Specifies logging configuration for the stdout and stderr of the @@ -687,7 +697,7 @@ let "tracer" # portName (can't have "-") 0 # portNum # TODO: Which region? - {region=null;}; # node-specs + {region=null;}; # node-spec } ] ++ @@ -706,7 +716,7 @@ let ("perf-node-" + (toString nodeSpec.i)) # serviceName ("node" + (toString nodeSpec.i)) # portName (can't have "-") nodeSpec.port # portNum - nodeSpec; # node-specs + nodeSpec; # node-spec }) (profileData.node-specs.value) ) diff --git a/nix/workbench/backend/nomad.nix b/nix/workbench/backend/nomad.nix index eb9e7c9c4c4..d2bf9b7b775 100644 --- a/nix/workbench/backend/nomad.nix +++ b/nix/workbench/backend/nomad.nix @@ -15,13 +15,6 @@ let materialise-profile = { profileData }: let - supervisorConf = import ./supervisor-conf.nix - { inherit profileData; - inherit pkgs lib stateDir; - # ''{{ env "NOMAD_TASK_DIR" }}/supervisor.sock'' - unixHttpServerPort = "/tmp/supervisor.sock"; - } - ; # Intermediate / workbench-adhoc container specifications containerSpecs = rec { # @@ -120,19 +113,17 @@ let podman = { # TODO: oneTracerPerGroup oneTracerPerCluster = import ./nomad-job.nix - { inherit lib stateDir; + { inherit pkgs lib stateDir; inherit profileData; inherit containerSpecs; - inherit supervisorConf; # May evolve to a "cloud" flag! execTaskDriver = false; oneTracerPerNode = false; }; oneTracerPerNode = import ./nomad-job.nix - { inherit lib stateDir; + { inherit pkgs lib stateDir; inherit profileData; inherit containerSpecs; - inherit supervisorConf; # May evolve to a "cloud" flag! execTaskDriver = false; oneTracerPerNode = true; @@ -141,19 +132,17 @@ let exec = { # TODO: oneTracerPerGroup oneTracerPerCluster = import ./nomad-job.nix - { inherit lib stateDir; + { inherit pkgs lib stateDir; inherit profileData; inherit containerSpecs; - inherit supervisorConf; # May evolve to a "cloud" flag! execTaskDriver = true; oneTracerPerNode = false; }; oneTracerPerNode = import ./nomad-job.nix - { inherit lib stateDir; + { inherit pkgs lib stateDir; inherit profileData; inherit containerSpecs; - inherit supervisorConf; # May evolve to a "cloud" flag! execTaskDriver = true; oneTracerPerNode = true; diff --git a/nix/workbench/backend/supervisor-conf.nix b/nix/workbench/backend/supervisor-conf.nix index 50c60179e88..c60be7b829b 100644 --- a/nix/workbench/backend/supervisor-conf.nix +++ b/nix/workbench/backend/supervisor-conf.nix @@ -1,7 +1,8 @@ { pkgs , lib , stateDir -, profileData +, nodeSpecs +, withTracer , unixHttpServerPort ? null , inetHttpServerPort ? null }: @@ -63,7 +64,7 @@ let }; } // - lib.attrsets.optionalAttrs (profileData.value.node.tracer) + lib.attrsets.optionalAttrs withTracer { "program:tracer" = { # "command" below assumes "directory" is set accordingly. @@ -82,25 +83,23 @@ let }; } // - listToAttrs - (flip mapAttrsToList profileData.node-services - (_: { nodeSpec, service, ... }: - nameValuePair "program:${nodeSpec.value.name}" { - # "command" below assumes "directory" is set accordingly. - directory = "${stateDir}/${nodeSpec.value.name}"; - command = "${command}"; - stdout_logfile = "${stateDir}/${nodeSpec.value.name}/stdout"; - stderr_logfile = "${stateDir}/${nodeSpec.value.name}/stderr"; - stopasgroup = false; - killasgroup = false; - autostart = false; - autorestart = false; - # Don't attempt any restart! - startretries = 0; - # Seconds it needs to stay running to consider the start successful - startsecs = 5; - }) - ) + (builtins.listToAttrs (lib.mapAttrsToList (nodeName: nodeSpec: + lib.attrsets.nameValuePair "program:${nodeName}" { + # "command" below assumes "directory" is set accordingly. + directory = "${stateDir}/${nodeName}"; + command = "${command}"; + stdout_logfile = "${stateDir}/${nodeName}/stdout"; + stderr_logfile = "${stateDir}/${nodeName}/stderr"; + stopasgroup = false; + killasgroup = false; + autostart = false; + autorestart = false; + # Don't attempt any restart! + startretries = 0; + # Seconds it needs to stay running to consider the start successful + startsecs = 5; + }) + nodeSpecs)) ## ## [unix_http_server] Section Settings ## diff --git a/nix/workbench/backend/supervisor.nix b/nix/workbench/backend/supervisor.nix index 56d80925161..84303e40a50 100644 --- a/nix/workbench/backend/supervisor.nix +++ b/nix/workbench/backend/supervisor.nix @@ -38,8 +38,10 @@ let materialise-profile = { profileData }: let supervisorConf = import ./supervisor-conf.nix - { inherit profileData; - inherit pkgs lib stateDir; + { inherit pkgs lib stateDir; + # Create a `supervisord.conf` + nodeSpecs = profileData.node-specs.value; + withTracer = profileData.value.node.tracer; inetHttpServerPort = "127.0.0.1:9001"; }; in pkgs.runCommand "workbench-backend-output-${profileData.profileName}-supervisor"