From d2ce18946592dc493f2f8b40021eb09630c3194e Mon Sep 17 00:00:00 2001
From: Federico Mastellone <federico.pascual-mastellone@iohk.io>
Date: Mon, 14 Nov 2022 10:53:48 +0000
Subject: [PATCH] workbench: Add nomad backend (single container version)

---
 nix/pkgs.nix                         |  18 +
 nix/workbench/backend/nomad-conf.nix |  81 ++++
 nix/workbench/backend/nomad-run.nix  | 152 +++++++
 nix/workbench/backend/nomad.nix      |  78 ++++
 nix/workbench/backend/nomad.sh       | 641 +++++++++++++++++++++++++++
 nix/workbench/wb                     |   1 +
 6 files changed, 971 insertions(+)
 create mode 100644 nix/workbench/backend/nomad-conf.nix
 create mode 100644 nix/workbench/backend/nomad-run.nix
 create mode 100644 nix/workbench/backend/nomad.nix
 create mode 100644 nix/workbench/backend/nomad.sh
diff --git a/nix/pkgs.nix b/nix/pkgs.nix
index 76d5b443fff..9c99205fe48 100644
--- a/nix/pkgs.nix
+++ b/nix/pkgs.nix
@@ -84,6 +84,9 @@ final: prev: with final; {
   supervisord-workbench-nix =
     { workbench ? pkgs.workbench, ... }@args: pkgs.callPackage ./workbench/backend/supervisor.nix args;
 
+  nomad-workbench =
+    { workbench ? pkgs.workbench, ... }@args: pkgs.callPackage ./workbench/backend/nomad.nix (args // { inherit nix2container; });
+
   all-profiles-json = (workbench.all-profiles{ inherit (supervisord-workbench-nix) backend; }).JSON;
 
   # An instance of the workbench, specialised to the supervisord backend and a profile,
@@ -103,6 +106,21 @@ final: prev: with final; {
         inherit batchName profileName supervisord-workbench cardano-node-rev;
       };
 
+  nomad-workbench-for-profile =
+    { batchName             ? customConfig.localCluster.batchName
+    , profileName           ? customConfig.localCluster.profileName
+    # FIXME: Makes no sense for this backend!
+    , useCabalRun           ? false
+    , workbenchDevMode      ? false
+    , profiled              ? false
+    , nomad-workbench       ? pkgs.callPackage ./workbench/backend/nomad.nix { inherit nix2container; }
+    , cardano-node-rev      ? null
+    }:
+    pkgs.callPackage ./workbench/backend/nomad-run.nix
+      {
+        inherit batchName profileName nomad-workbench cardano-node-rev;
+      };
+
   # Disable failing python uvloop tests
   python38 = prev.python38.override {
     packageOverrides = pythonFinal: pythonPrev: {
diff --git a/nix/workbench/backend/nomad-conf.nix b/nix/workbench/backend/nomad-conf.nix
new file mode 100644
index 00000000000..614fa7f7cdf
--- /dev/null
+++ b/nix/workbench/backend/nomad-conf.nix
@@ -0,0 +1,81 @@
+{ pkgs
+# Cardano packages/executables.
+, cardano-node, cardano-tracer, tx-generator
+# OCI Image builder.
+, nix2container
+}:
+
+let
+
+  # Why `nix2container` instead of the built-in `dockerTools` ?:
+  # - https://lewo.abesis.fr/posts/nix-build-container-image/
+  # - https://discourse.nixos.org/t/nix2container-another-dockertools-buildimage-implementation-based-on-skopeo/21688
+  n2c = nix2container.outputs.packages.x86_64-linux.nix2container;
+
+  clusterImage = n2c.buildImage {
+    name = "registry.workbench.iog.io/cluster";
+    # Adds `/etc/protocols` and ``/etc/services` to the root directory.
+    # FIXME: Inside the container still can't resolve `localhost` but can
+    # resolve WAN domains using public DNS servers.
+    # Running `bash-5.1# /nix/store/*-glibc-#-bin/bin/getent hosts localhost`
+    # inside the container returns nothing and python stuff like `supervisord`
+    # breaks: "error: <class 'socket.gaierror'>, [Errno -2] Name or service not known: file: /nix/store/hb1lzaisgx2m9n29hqhh6yp6hasplq1v-python3-3.9.10/lib/python3.9/socket.py line: 954"
+    # Further reading for hints:
+    # https://stackoverflow.com/questions/39965432/docker-container-unable-to-resolve-localhost
+    copyToRoot = with pkgs; [ iana-etc ];
+    # All these layers are added to /nix/store, nothing is in `$PATH`.
+    maxLayers = 25;
+    layers = with pkgs; [
+      # Runtime to be able run bash commands from `podman`/`nomad`.
+      (n2c.buildLayer {deps = [ bashInteractive coreutils  ];})
+      # Supervisor.
+      (n2c.buildLayer {deps = [ python3Packages.supervisor ];})
+      # Cardano packages.
+      (n2c.buildLayer {deps = [ cardano-node               ];})
+      (n2c.buildLayer {deps = [ cardano-tracer             ];})
+      (n2c.buildLayer {deps = [ tx-generator               ];})
+    ];
+    # OCI container specification:
+    # https://github.com/opencontainers/image-spec/blob/3a7f492d3f1bcada656a7d8c08f3f9bbd05e7406/specs-go/v1/config.go#L24
+    config = {
+      # Volumes are mounted as user `0:0`, I have no choice here.
+      User = "0:0";
+      # The stanza `WorkingDir` is not used because the config file of
+      # `supervisord` depends on the working directory.
+      Entrypoint =
+        let
+          entrypoint = pkgs.writeShellApplication {
+            name = "entrypoint";
+            runtimeInputs = with pkgs; [
+              coreutils
+              bashInteractive
+              python3Packages.supervisor
+            ];
+            text = ''
+              # The SUPERVISOR_NIX variable must be set
+              [ -z "''${SUPERVISOR_NIX:-}" ] && echo "SUPERVISOR_NIX env var must be set -- aborting" && exit 1
+
+              # The SUPERVISORD_CONFIG variable must be set
+              [ -z "''${SUPERVISORD_CONFIG:-}" ] && echo "SUPERVISORD_CONFIG env var must be set -- aborting" && exit 1
+
+              # Create a link to the `supervisor` Nix folder.
+              # First check if already exists to be able to restart containers.
+              if ! test -e "$SUPERVISOR_NIX"
+              then
+                "${pkgs.coreutils}"/bin/ln -s "${pkgs.python3Packages.supervisor}" "$SUPERVISOR_NIX"
+              fi
+
+              # Start `supervisord` on the foreground.
+              "${pkgs.python3Packages.supervisor}"/bin/supervisord --nodaemon --configuration "$SUPERVISORD_CONFIG"
+            '';
+          };
+        in
+          [ "${entrypoint}/bin/entrypoint" ];
+    };
+  };
+
+in {
+
+  inherit clusterImage;
+
+}
diff --git a/nix/workbench/backend/nomad-run.nix b/nix/workbench/backend/nomad-run.nix
new file mode 100644
index 00000000000..5cc9fde7a0b
--- /dev/null
+++ b/nix/workbench/backend/nomad-run.nix
@@ -0,0 +1,152 @@
+let
+  batchNameDefault   = "plain";
+  profileNameDefault = "default-bage";
+in
+{ pkgs
+, cardanoNodePackages
+, nomad-workbench
+##
+, profileName           ? profileNameDefault
+, batchName             ? batchNameDefault
+##
+, workbenchDevMode      ? false
+, cardano-node-rev      ? "0000000000000000000000000000000000000000"
+}:
+let
+  inherit (nomad-workbench) workbench backend cacheDir stateDir basePort;
+
+  with-nomad-profile =
+    { envArgsOverride ? {} }: ## TODO: envArgsOverride is not used!
+    workbench.with-profile
+      { inherit backend profileName; };
+
+  inherit (with-nomad-profile {}) profileNix profile topology genesis;
+in
+  let
+
+    inherit (profile.value) era composition monetary;
+
+    path = pkgs.lib.makeBinPath path';
+    path' =
+      [ cardanoNodePackages.bech32 pkgs.jq pkgs.gnused pkgs.coreutils pkgs.bash pkgs.moreutils
+      ]
+      ## In dev mode, call the script directly:
+      ++ pkgs.lib.optionals (!workbenchDevMode)
+      [ workbench.workbench ];
+
+    interactive-start = pkgs.writeScriptBin "start-cluster" ''
+      set -euo pipefail
+
+      export PATH=$PATH:${path}
+      unset WB_MODE_CABAL=
+      wb start \
+        --batch-name   ${batchName} \
+        --profile-name ${profileName} \
+        --profile      ${profile} \
+        --cache-dir    ${cacheDir} \
+        --base-port    ${toString basePort} \
+        ''${WB_MODE_CABAL:+--cabal} \
+        "$@"
+    '';
+
+    interactive-stop = pkgs.writeScriptBin "stop-cluster" ''
+      set -euo pipefail
+
+      wb finish "$@"
+    '';
+
+    interactive-restart = pkgs.writeScriptBin "restart-cluster" ''
+      set -euo pipefail
+
+      wb run restart "$@" && \
+        echo "workbench:  alternate command for this action:  wb run restart" >&2
+    '';
+
+    nodeBuildProduct =
+      name:
+      "report ${name}-log $out ${name}/stdout";
+
+    profile-run =
+      { trace ? false }:
+      let
+        inherit
+          (with-nomad-profile
+            { envArgsOverride = { cacheDir = "./cache"; stateDir = "./"; }; })
+          profileNix profile topology genesis;
+
+        run = pkgs.runCommand "workbench-run-nomad-${profileName}"
+          { requiredSystemFeatures = [ "benchmark" ];
+            nativeBuildInputs = with cardanoNodePackages; with pkgs; [
+              bash
+              bech32
+              coreutils
+              gnused
+              jq
+              moreutils
+              nixWrapped
+              pstree
+# TODO:       nomad
+              workbench.workbench
+              zstd
+            ];
+          }
+            ''
+            mkdir -p    $out/{cache,nix-support}
+            cd          $out
+            export HOME=$out
+
+            export WB_BACKEND=nomad
+            export CARDANO_NODE_SOCKET_PATH=$(wb backend get-node-socket-path ${stateDir} node-0)
+
+            cmd=(
+              wb
+              ${pkgs.lib.optionalString trace "--trace"}
+              start
+              --profile-name        ${profileName}
+              --profile             ${profile}
+              --topology            ${topology}
+              --genesis-cache-entry ${genesis}
+              --batch-name          smoke-test
+              --base-port           ${toString basePort}
+              --node-source         ${cardanoNodePackages.cardano-node.src.origSrc}
+              --node-rev            ${cardano-node-rev}
+              --cache-dir           ./cache
+            )
+            echo "''${cmd[*]}" > $out/wb-start.sh
+
+            time "''${cmd[@]}" 2>&1 |
+              tee $out/wb-start.log
+
+            ## Convert structure from $out/run/RUN-ID/* to $out/*:
+            rm -rf cache
+            rm -f run/{current,-current}
+            find $out -type s | xargs rm -f
+            run=$(cd run; ls)
+            (cd run; tar c $run --zstd) > archive.tar.zst
+            mv       run/$run/*  .
+            rmdir    run/$run    run
+
+
+            cat > $out/nix-support/hydra-build-products <<EOF
+            report workbench-log   $out wb-start.log
+            report meta            $out meta.json
+            ${pkgs.lib.concatStringsSep "\n"
+              (map nodeBuildProduct (__attrNames profileNix.node-specs.value))}
+            report archive-tar-zst $out archive.tar.zst
+            EOF
+
+            echo "workbench-test:  completed run $run"
+            '';
+  in
+    run // {
+      analysis = workbench.run-analysis { inherit pkgs workbench profileNix run; };
+    };
+in
+{
+  inherit stateDir;
+  inherit workbench nomad-workbench;
+  inherit (nomad-workbench) backend;
+  inherit profileNix profile topology genesis;
+  inherit interactive-start interactive-stop interactive-restart;
+  inherit profile-run;
+}
diff --git a/nix/workbench/backend/nomad.nix b/nix/workbench/backend/nomad.nix
new file mode 100644
index 00000000000..7594287e94a
--- /dev/null
+++ b/nix/workbench/backend/nomad.nix
@@ -0,0 +1,78 @@
+let
+  basePort              = 30000;
+  cacheDirDefault       = "${__getEnv "HOME"}/.cache/cardano-workbench";
+  stateDir              = "run/current";
+in
+{ pkgs
+, lib, nix2container
+, workbench
+##
+, cacheDir              ? cacheDirDefault
+, extraBackendConfig    ? {}
+## `useCabalRun` not used here like in `supervisor.nix`.
+, enableEKG             ? true
+##
+, ...
+}:
+let
+  backend =
+    rec
+    { name = "nomad";
+
+      services-config = import ./services-config.nix {inherit lib workbench basePort stateDir; useCabalRun = false; inherit enableEKG;};
+
+      extraShellPkgs = with pkgs; [
+        # https://docs.podman.io/en/latest/markdown/podman.1.html#rootless-mode
+        podman
+        # Was not needed even thou it says so!
+        # https://docs.podman.io/en/latest/markdown/podman.1.html#note-unsupported-file-systems-in-rootless-mode
+        # fuse-overlayfs
+        nomad
+        nomad-driver-podman
+      ];
+
+      materialise-profile =
+        { profileNix }:
+          let
+            supervisorConfPath =
+              import ./supervisor-conf.nix
+                { inherit (profileNix) node-services;
+                  inherit
+                    pkgs lib stateDir
+                    basePort
+                    extraBackendConfig;
+                };
+            nomadConf =
+              import ./nomad-conf.nix
+                { inherit pkgs;
+                  inherit
+                    (pkgs.cardanoNodePackages)
+                    cardano-node cardano-tracer tx-generator;
+                  inherit nix2container;
+                };
+          in pkgs.runCommand "workbench-backend-output-${profileNix.name}-${name}"
+            (rec {
+              inherit supervisorConfPath;
+              # All In One
+              clusterImage = nomadConf.clusterImage;
+              clusterImageCopyToPodman = clusterImage.copyToPodman;
+              clusterImageName = clusterImage.imageName;
+              clusterImageTag = clusterImage.imageTag;
+            })
+            ''
+            mkdir $out
+
+            ln -s $supervisorConfPath                      $out/supervisor.conf
+
+            ln -s $clusterImage                            $out/clusterImage
+            echo $clusterImageName                       > $out/clusterImageName
+            echo $clusterImageTag                        > $out/clusterImageTag
+            ln -s $clusterImageCopyToPodman/bin/copy-to-podman $out/clusterImageCopyToPodman
+            '';
+    };
+in
+{
+  inherit cacheDir stateDir basePort;
+  inherit workbench;
+  inherit backend;
+}
diff --git a/nix/workbench/backend/nomad.sh b/nix/workbench/backend/nomad.sh
new file mode 100644
index 00000000000..2b37957eafe
--- /dev/null
+++ b/nix/workbench/backend/nomad.sh
@@ -0,0 +1,641 @@
+usage_nomad() {
+     usage "nomad" "Backend:  manages a local cluster using 'nomad' (and 'podman')" <<EOF
+
+    Please see documentation for 'wb backend' for the supported commands.
+
+    Nomad-specific:
+
+    service-start      RUN-DIR SERVICE
+    service-stop       RUN-DIR SERVICE
+    is-service-running RUN-DIR SERVICE
+
+EOF
+}
+
+backend_nomad() {
+op=${1:?$(usage_nomad)}; shift
+
+case "$op" in
+
+    name )
+        echo 'nomad'
+        ;;
+
+    is-running )
+        # Hack: Look for node-0's default port!
+        test "$(sleep 0.5s; netstat -pltn 2>/dev/null | grep ':30000 ' | wc -l)" != "0"
+        ;;
+
+    setenv-defaults )
+        local usage="USAGE: wb nomad $op PROFILE-DIR"
+        local profile_dir=${1:?$usage}
+
+        # Look up `supervisord` config file produced by Nix (run profile).
+        setenvjqstr 'supervisord_conf' "$profile_dir"/supervisor.conf
+        # The `--serverurl` argument is needed in every call to `nomad exec`.
+        # The problem is that if we use "127.0.0.1:9001" as parameter (without
+        # the "http" part) the container returns:
+        # error: <class 'ValueError'>, Unknown protocol for serverurl 127.0.0.1:9001: file: /nix/store/izqhlj5i1x9ldyn43d02kcy4mafmj3ci-python3.9-supervisor-4.2.4/lib/python3.9/site-packages/supervisor/xmlrpc.py line: 508
+        # Without using the `--serverurl` parameter at all (using INI config
+        # file's [inet_http_server] port stanza) also without "http://":
+        # error: <class 'socket.gaierror'>, [Errno -2] Name or service not known: file: /nix/store/hb1lzaisgx2m9n29hqhh6yp6hasplq1v-python3-3.9.10/lib/python3.9/socket.py line: 954
+        # If I add the "http" part to the INI file, when starting `supervisord`
+        # inside the container I get (from journald):
+        # Nov 02 11:44:36 hostname cluster-18f3852f-e067-6394-8159-66a7b8da2ecc[1088457]: Error: Cannot open an HTTP server: socket.error reported -2
+        # Nov 02 11:44:36 hostname cluster-18f3852f-e067-6394-8159-66a7b8da2ecc[1088457]: For help, use /nix/store/izqhlj5i1x9ldyn43d02kcy4mafmj3ci-python3.9-supervisor-4.2.4/bin/supervisord -h
+        setenvjqstr 'supervisord_url' "http://127.0.0.1:9001"
+        # Look up `cluster` OCI image's name and tag (also Nix profile).
+        setenvjqstr 'oci_image_name' ${WB_OCI_IMAGE_NAME:-$(cat "$profile_dir/clusterImageName")}
+        setenvjqstr 'oci_image_tag'  ${WB_OCI_IMAGE_TAG:-$(cat  "$profile_dir/clusterImageTag")}
+        # Script that creates the OCI image from nix2container layered output.
+        setenvjqstr 'oci_image_skopeo_script' "$profile_dir/clusterImageCopyToPodman"
+        # Set cluster's podman container defaults.
+        # The workbench is expecting an specific hierarchy of folders and files.
+        setenvjqstr 'container_workdir' "/tmp/cluster/"
+        setenvjqstr 'container_mountpoint' "/tmp/cluster/run/current"
+        # The `supervisord` binary is installed inside the container but not
+        # added to $PATH (resides in /nix/store), so a desired location is
+        # passed to the container as an environment variable to create a symlink
+        # to it.
+        setenvjqstr 'container_supervisor_nix' "/tmp/cluster/run/current/supervisor/nix-store"
+        # The container need to know where `supervisord` config file is located
+        # so it can be started. This is passed as an environment variable.
+        setenvjqstr 'container_supervisord_conf' "/tmp/cluster/run/current/supervisor/supervisord.conf"
+        ;;
+
+    # Man pages for Podman configuration files:
+    # https://man.archlinux.org/man/community/podman/podman.1.en
+    # https://man.archlinux.org/man/containers.conf.5
+    # https://man.archlinux.org/man/containers-storage.conf.5
+    # https://man.archlinux.org/man/containers-policy.json.5
+
+    allocate-run )
+        local usage="USAGE: wb nomad $op RUN-DIR"
+        local dir=${1:?$usage}; shift
+
+        while test $# -gt 0
+        do case "$1" in
+               --* ) msg "FATAL:  unknown flag '$1'"; usage_docker;;
+               * ) break;; esac; shift; done
+
+        # The `genesis/utxo-keys` directory is used as a volume for the
+        # `generator` service but it's not always present/created.
+        if ! test -e "$dir"/genesis/utxo-keys
+        then
+            mkdir -p "$dir"/genesis/utxo-keys
+        else
+          # HACK: UGLY!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+          ############### FIXME: Fix it in `genesis.sh` ###############
+          mv "$dir"/genesis/utxo-keys "$dir"/genesis/utxo-keys.bak
+          # The `genesis/utxo-keys` directory is used as a volume for the
+          # `generator` service but it's not always present/created.
+          mkdir -p "$dir"/genesis/utxo-keys
+          cp -r "$dir"/genesis/utxo-keys.bak/* "$dir"/genesis/utxo-keys/
+        fi
+
+        # Populate the files needed by the `supervisord` instance running inside
+        # the container.
+        local supervisord_conf=$(envjqr 'supervisord_conf')
+        mkdir -p                 "$dir"/supervisor
+        # If $dir is being mounted inside the container the file must be copied
+        # because if it references something outside the container's mounted
+        # volume the container probably won't be able to access it.
+        cp -f "$supervisord_conf" "$dir"/supervisor/supervisord.conf
+
+        # Create the "cluster" OCI image.
+        local oci_image_name=$(         envjqr 'oci_image_name')
+        local oci_image_tag=$(          envjqr 'oci_image_tag')
+        local oci_image_skopeo_script=$(envjqr 'oci_image_skopeo_script')
+        msg "Creating OCI image ..."
+        # Forced the `overlay` storage driver or podman won't see the image.
+        # https://docs.podman.io/en/latest/markdown/podman.1.html#note-unsupported-file-systems-in-rootless-mode
+        STORAGE_DRIVER=overlay "$oci_image_skopeo_script"
+        # Check that `podman` can see the "cluster" OCI image.
+        if ! podman image exists "${oci_image_name}:${oci_image_tag}"
+        then
+            fatal "OCI image ${oci_image_name}:${oci_image_tag} cannot be found by podman"
+        else
+            msg "OCI image named \"${oci_image_name}:${oci_image_tag}\" created"
+        fi
+
+        # Configure `nomad` and the `podman` plugin/task driver.
+        nomad_create_folders_and_config "$dir"
+        msg "Preparing podman API service for nomad driver \`nomad-driver-podman\` ..."
+        nomad_start_podman_service "$dir"
+
+        # Start `nomad` agent in "-dev-` mode`".
+        msg "Starting nomad agent ..."
+        # The Nomad agent is a long running process which runs on every machine
+        # that is part of the Nomad cluster. The behavior of the agent depends
+        # on if it is running in client or server mode. Clients are responsible
+        # for running tasks, while servers are responsible for managing the
+        # cluster.
+        # -dev: Start the agent in development mode. This enables a
+        # pre-configured dual-role agent (client + server) which is useful for
+        # developing or testing Nomad. No other configuration is required to
+        # start the agent in this mode, but you may pass an optional
+        # comma-separated list of mode configurations
+        nomad agent -config="$dir/nomad/config" -dev -log-level=INFO >> "$dir/nomad/stdout" 2>> "$dir/nomad/stderr" &
+        echo "$!" > "$dir/nomad/nomad.pid"
+        setenvjqstr 'nomad_pid' $(cat $dir/nomad/nomad.pid)
+        msg "Nomad started with PID $(cat $dir/nomad/nomad.pid)"
+
+        # Wait for nomad agent:
+        msg "Waiting for the listening HTTP server ..."
+        local i=0
+        local patience=25
+        until curl -Isf 127.0.0.1:4646 2>&1 | head --lines=1 | grep --quiet "HTTP/1.1"
+        do printf "%3d" $i; sleep 1
+            i=$((i+1))
+            if test $i -ge $patience
+            then echo
+                progress "nomad agent" "$(red FATAL):  workbench:  nomad agent:  patience ran out after ${patience}s, 127.0.0.1:4646"
+                backend_nomad stop-cluster "$dir"
+                fatal "nomad agent startup did not succeed:  check logs"
+            fi
+            echo -ne "\b\b\b"
+        done >&2
+
+        # Create and start the nomad job.
+        nomad_create_job_file "$dir"
+        msg "Starting nomad job ..."
+        # Upon successful job submission, this command will immediately enter
+        # an interactive monitor. This is useful to watch Nomad's internals make
+        # scheduling decisions and place the submitted work onto nodes. The
+        # monitor will end once job placement is done. It is safe to exit the
+        # monitor early using ctrl+c.
+        # On successful job submission and scheduling, exit code 0 will be
+        # returned. If there are job placement issues encountered (unsatisfiable
+        # constraints, resource exhaustion, etc), then the exit code will be 2.
+        # Any other errors, including client connection issues or internal
+        # errors, are indicated by exit code 1.
+        nomad job run -verbose "$dir/nomad/job-cluster.hcl"
+        # Assuming that `nomad` placement is enough wait.
+        local nomad_alloc_id=$(nomad job allocs -json cluster | jq -r '.[0].ID')
+        setenvjqstr 'nomad_alloc_id' "$nomad_alloc_id"
+        msg "Nomad job allocation ID is: $nomad_alloc_id"
+        # Show `--status` of `supervisorctl` inside the container.
+        local supervisord_url=$(envjqr 'supervisord_url')
+        local container_supervisor_nix=$(  envjqr 'container_supervisor_nix')
+        local container_supervisord_conf=$(envjqr 'container_supervisord_conf')
+        msg "Supervisor status inside container ..."
+        # Print the command used for debugging purposes.
+        msg "'nomad alloc --task cluster exec --task cluster \"$nomad_alloc_id\" \"$container_supervisor_nix\"/bin/supervisorctl --serverurl \"$supervisord_url\" --configuration \"$container_supervisord_conf\" status'"
+        # Execute the actual command.
+        nomad alloc exec --task cluster "$nomad_alloc_id" "$container_supervisor_nix"/bin/supervisorctl --serverurl "$supervisord_url" --configuration "$container_supervisord_conf" status || true
+        ;;
+
+    describe-run )
+        local usage="USAGE: wb nomad $op RUN-DIR"
+        local dir=${1:?$usage}
+
+        echo "  - Nomad job: $(realpath $dir)/nomad/job-cluster.hcl"
+        ;;
+
+    # Nomad-specific
+    service-start )
+        local usage="USAGE: wb nomad $op RUN-DIR NODE-NAME"
+        local dir=${1:?$usage}; shift
+        local service=${1:?$usage}; shift
+
+        backend_nomad nomad-alloc-exec-supervisorctl "$dir" cluster start "$service"
+        ;;
+
+    # Nomad-specific
+    service-stop )
+        local usage="USAGE: wb nomad $op RUN-DIR NODE-NAME"
+        local dir=${1:?$usage}; shift
+        local service=${1:?$usage}; shift
+
+        backend_nomad nomad-alloc-exec-supervisorctl "$dir" cluster stop "$service"
+        ;;
+
+    # Nomad-specific
+    is-service-running )
+        local usage="USAGE: wb nomad $op RUN-DIR DOCKER-SERVICE"
+        local dir=${1:?$usage}; shift
+        local service=${1:?$usage}; shift
+
+        backend_nomad nomad-alloc-exec-supervisorctl "$dir" cluster status "$service" > /dev/null && true
+        ;;
+
+    # Nomad-specific
+    nomad-alloc-exec-supervisorctl )
+        local usage="USAGE: wb nomad $op RUN-DIR NODE-NAME"
+        local dir=${1:?$usage}; shift
+        local task=${1:?$usage}; shift
+        local action=${1:?$usage}; shift
+
+        local nomad_alloc_id=$(envjqr 'nomad_alloc_id')
+        local supervisord_url=$(envjqr 'supervisord_url')
+        local container_supervisor_nix=$(envjqr 'container_supervisor_nix')
+        local container_supervisord_conf=$(envjqr 'container_supervisord_conf')
+        nomad alloc exec --task "$task" "$nomad_alloc_id" "$container_supervisor_nix"/bin/supervisorctl --serverurl "$supervisord_url" --configuration "$container_supervisord_conf" "$action" $@
+        ;;
+
+    start-node )
+        local usage="USAGE: wb nomad $op RUN-DIR NODE-NAME"
+        local dir=${1:?$usage}; shift
+        local node=${1:?$usage}; shift
+
+        backend_nomad service-start "$dir" $node
+        # Always wait for the node to be ready.
+        backend_nomad wait-node "$dir" $node
+        ;;
+
+    stop-node )
+        local usage="USAGE: wb nomad $op RUN-DIR NODE-NAME"
+        local dir=${1:?$usage}; shift
+        local node=${1:?$usage}; shift
+
+        backend_nomad service-stop "$dir" $node
+        ;;
+
+    wait-node )
+        local usage="USAGE: wb nomad $op RUN-DIR [NODE-NAME]"
+        local dir=${1:?$usage}; shift
+        local node=${1:-$(dirname $CARDANO_NODE_SOCKET_PATH | xargs basename)}; shift
+        local socket=$(backend_nomad get-node-socket-path "$dir" $node)
+
+        local patience=$(jq '.analysis.cluster_startup_overhead_s | ceil' $dir/profile.json) i=0
+        echo -n "workbench:  nomad:  waiting ${patience}s for socket of $node: " >&2
+        while test ! -S $socket
+        do printf "%3d" $i; sleep 1
+           i=$((i+1))
+           if test $i -ge $patience
+           then echo
+                progress "nomad" "$(red FATAL):  workbench:  nomad:  patience ran out for $(white $node) after ${patience}s, socket $socket"
+                backend_nomad stop-cluster "$dir"
+                fatal "$node startup did not succeed:  check logs in $(dirname $socket)/stdout & stderr"
+           fi
+           echo -ne "\b\b\b"
+        done >&2
+        echo " $node up (${i}s)" >&2
+        ;;
+
+    start-nodes )
+        local usage="USAGE: wb nomad $op RUN-DIR [HONOR_AUTOSTART=]"
+        local dir=${1:?$usage}; shift
+        local honor_autostart=${1:-}
+
+        local nodes=($(jq_tolist keys "$dir"/node-specs.json))
+        for node in ${nodes[*]}
+        do
+            if test -n "$honor_autostart"
+            then
+                if jqtest ".\"$node\".autostart" "$dir"/node-specs.json
+                then
+                    backend_nomad start-node "$dir" "$node"
+                fi
+            else
+                backend_nomad start-node "$dir" "$node"
+            fi
+        done
+
+        if test ! -v CARDANO_NODE_SOCKET_PATH
+        then export  CARDANO_NODE_SOCKET_PATH=$(backend_nomad get-node-socket-path "$dir" 'node-0')
+        fi
+        ;;
+
+    start )
+        local usage="USAGE: wb nomad $op RUN-DIR"
+        local dir=${1:?$usage}; shift
+
+        if jqtest ".node.tracer" "$dir"/profile.json
+        then
+          backend_nomad service-start "$dir" tracer
+          # Wait for tracer socket
+          # If tracer fails here, the rest of the cluster is brought up without
+          # any problems.
+          local socket=$(jq -r '.network.contents' "$dir/tracer/config.json")
+          local patience=$(jq '.analysis.cluster_startup_overhead_s | ceil' "$dir/profile.json") i=0
+          echo -n "workbench:  nomad:  waiting ${patience}s for socket of tracer: " >&2
+          while test ! -S "$dir/tracer/$socket"
+          do printf "%3d" $i; sleep 1
+             i=$((i+1))
+             if test $i -ge $patience
+             then echo
+                  progress "nomad" "$(red FATAL):  workbench:  nomad:  patience ran out for $(white tracer) after ${patience}s, socket $socket"
+                  backend_nomad stop-cluster "$dir"
+                  fatal "$node startup did not succeed:  check logs in $(dirname $socket)/stdout & stderr"
+             fi
+             echo -ne "\b\b\b"
+          done >&2
+          echo " tracer up (${i}s)" >&2
+        fi
+        ;;
+
+    get-node-socket-path )
+        local usage="USAGE: wb nomad $op RUN-DIR NODE-NAME"
+        local dir=${1:?$usage}
+        local node_name=${2:?$usage}
+
+        echo -n $dir/$node_name/node.socket
+        ;;
+
+    start-generator )
+        local usage="USAGE: wb nomad $op RUN-DIR"
+        local dir=${1:?$usage}; shift
+
+        while test $# -gt 0
+        do case "$1" in
+               --* ) msg "FATAL:  unknown flag '$1'"; usage_docker;;
+               * ) break;; esac; shift; done
+
+        backend_nomad service-start "$dir" generator
+        ;;
+
+    wait-node-stopped )
+        local usage="USAGE: wb nomad $op RUN-DIR NODE"
+        local dir=${1:?$usage}; shift
+        local node=${1:?$usage}; shift
+
+        progress_ne "docker" "waiting until $node stops:  ....."
+        local i=0
+        while backend_nomad is-service-running "$dir" "$node"
+        do
+          echo -ne "\b\b\b\b\b"; printf "%5d" $i >&2; i=$((i+1))
+          sleep 1
+        done >&2
+        echo -e "\b\b\b\b\bdone, after $(with_color white $i) seconds" >&2
+        ;;
+
+    wait-pools-stopped )
+        local usage="USAGE: wb nomad $op RUN-DIR"
+        local dir=${1:?$usage}; shift
+
+        local i=0 pools=$(jq .composition.n_pool_hosts $dir/profile.json) start_time=$(date +%s)
+        msg_ne "nomad:  waiting until all pool nodes are stopped: 000000"
+        touch $dir/flag/cluster-termination
+
+        for ((pool_ix=0; pool_ix < $pools; pool_ix++))
+        do
+          while backend_nomad is-service-running "$dir" "node-${pool_ix}" && test -f $dir/flag/cluster-termination
+          do
+            echo -ne "\b\b\b\b\b\b"; printf "%6d" $((i + 1)); i=$((i+1))
+            sleep 1
+          done
+          echo -ne "\b\b\b\b\b\b"; echo -n "node-${pool_ix} 000000"
+        done >&2
+        echo -ne "\b\b\b\b\b\b"
+        local elapsed=$(($(date +%s) - start_time))
+        if test -f $dir/flag/cluster-termination
+        then echo " All nodes exited -- after $(yellow $elapsed)s" >&2
+        else echo " Termination requested -- after $(yellow $elapsed)s" >&2; fi
+        ;;
+
+    stop-cluster )
+        local usage="USAGE: wb nomad $op RUN-DIR"
+        local dir=${1:?$usage}; shift
+
+        local nomad_alloc_id=$(envjqr 'nomad_alloc_id')
+        local supervisord_url=$(envjqr 'supervisord_url')
+        local container_supervisor_nix=$(envjqr 'container_supervisor_nix')
+        local container_supervisord_conf=$(envjqr 'container_supervisord_conf')
+        nomad alloc exec --task cluster "$nomad_alloc_id" "$container_supervisor_nix"/bin/supervisorctl --serverurl "$supervisord_url" --configuration "$container_supervisord_conf" stop all || true > /dev/null
+
+        nomad job stop -global -no-shutdown-delay -purge -yes cluster
+        local nomad_pid=$(envjqr 'nomad_pid')
+        kill -SIGINT "$nomad_pid"
+        ;;
+
+    cleanup-cluster )
+        local usage="USAGE: wb nomad $op RUN-DIR"
+        local dir=${1:?$usage}; shift
+
+        msg "nomad:  resetting cluster state in:  $dir"
+        rm -f $dir/*/std{out,err} $dir/node-*/*.socket $dir/*/logs/* 2>/dev/null || true
+        rm -fr $dir/node-*/state-cluster/
+        # Clean nomad logs.
+        rm -f $dir/nomad/nomad.log $dir/nomad/std{out,err}
+        rm -rf $dir/nomad/data/*
+        ;;
+
+    * ) usage_docker;; esac
+}
+
+# Configure `nomad` and its `podman` plugin / task driver
+# (Task Drivers are also called plugins because they are pluggable).
+#
+# WARNING: `podman`/`skopeo` are run using default parameters. Every workbench
+# user is responsible for its local/global configurations.
+# TODO: Unless this breaks reproducibility and with every call config files
+# and parameters need to be overriden.
+# For example:
+# Local version of /etc/containers/containers.conf
+#     mkdir -p $HOME/.config/containers/
+#     touch $HOME/.config/containers/containers.conf
+#     CONTAINERS_CONF=$HOME/.config/containers/containers.conf
+# Local version of /etc/containers/storage.conf
+# https://www.mankier.com/5/containers-storage.conf
+#     mkdir -p $HOME/.local/share/containers/storage/volumes
+#     touch $HOME/.config/containers/storage.conf
+#     CONTAINERS_STORAGE_CONF=$HOME/.config/containers/storage.conf
+# Local version of /etc/containers/policy.json
+# https://www.mankier.com/5/containers-policy.json
+#     mkdir -p $HOME/.config/containers/
+#     touch $HOME/.config/containers/policy.json
+nomad_create_folders_and_config() {
+    local dir=$1
+    # Folders:
+    mkdir -p "$dir/nomad/config"
+    mkdir -p "$dir/nomad/data"
+    mkdir -p "$dir/nomad/data/plugins"
+    # Podman Task Driver - Client Requirements:
+    # "Ensure that Nomad can find the plugin, refer to `plugin_dir`."
+    # https://www.nomadproject.io/plugins/drivers/podman#client-requirements
+    ln -s "$(which nomad-driver-podman)" "$dir/nomad/data/plugins/nomad-driver-podman"
+    # Config:
+    # - `nomad` configuration docs:
+    # - - https://www.nomadproject.io/docs/configuration
+    # - Generic `nomad` plugins / task drivers configuration docs:
+    # - - https://www.nomadproject.io/plugins/drivers
+    # - - https://www.nomadproject.io/docs/configuration/plugin
+    # - Specific `nomad` `podman` plugin / task driver configuration docs:
+    # - - https://www.nomadproject.io/plugins/drivers/podman#plugin-options
+    # - - https://github.com/hashicorp/nomad-driver-podman#driver-configuration
+    cat > "$dir/nomad/config/nomad.hcl" <<- EOF
+        region = "workbench"
+        datacenter = "workbench"
+        name = "workbench"
+        data_dir  = "$dir/nomad/data"
+        plugin_dir  = "$dir/nomad/data/plugins"
+        bind_addr = "127.0.0.1"
+        ports = {
+          http = 4646
+        }
+        log_level = "INFO"
+        log_json = true
+        log_file = "$dir/nomad/"
+        leave_on_interrupt = true
+        leave_on_terminate = true
+        plugin "nomad-driver-podman" {
+          args = []
+          config {
+            # TODO: Use custom socket location!
+            # socket_path = "unix:$dir/nomad/podman.sock"
+            volumes {
+              enabled = true
+            }
+          }
+        }
+EOF
+}
+
+# Start the `podman` API service needed by `nomad`.
+nomad_start_podman_service() {
+    local dir=$1
+    # TODO: Use custom socket location!
+    # podman --url "unix:$dir/nomad/podman.sock" system service --time 60 "unix:$dir/nomad/podman.sock" &
+    local socket="/run/user/$UID/podman/podman.sock"
+#    if test -S "$socket"
+#    then
+#        msg "Podman API service was already running"
+#    else
+        # The session is kept open waiting for a new connection for 60 seconds.
+        # https://discuss.hashicorp.com/t/nomad-podman-rhel8-driver-difficulties/21877/4
+        # `--time`: Time until the service session expires in seconds. Use 0
+        # to disable the timeout (default 5).
+        podman system service --time 60 &
+        local i=0
+        local patience=5
+        while test ! -S "$socket"
+        do printf "%3d" $i; sleep 1
+            i=$((i+1))
+            if test $i -ge $patience
+            then echo
+                progress "nomad-driver-podman" "$(red FATAL):  workbench:  nomad-driver-podman:  patience ran out after ${patience}s, socket $socket"
+                backend_nomad stop-cluster "$dir"
+                fatal "nomad-driver-podman startup did not succeed:  check logs"
+            fi
+            echo -ne "\b\b\b"
+        done >&2
+#    fi
+    msg "Podman API service started"
+}
+
+# Need to use HCL instead of JSON. The only workaround is to send commands to
+# Nomad using `curl` instead of the command line (`nomad job ...`).
+# - "The nomad job run command currently accepts only HCL"
+# [https://github.com/hashicorp/nomad/issues/6758#issuecomment-794116722]
+nomad_create_job_file() {
+    local dir=$1
+    local container_mountpoint=$(      envjqr 'container_mountpoint')
+    # If CARDANO_MAINNET_MIRROR is present attach it as a volume.
+    if test -n "$CARDANO_MAINNET_MIRROR"
+    then
+      # The nix-store path contains 3 levels of symlinks. This is a hack to
+      # avoid creating a container image with all these files.
+      local immutable_store=$(readlink -f "$CARDANO_MAINNET_MIRROR"/immutable)
+      local optional_volumes="[
+          \"$CARDANO_MAINNET_MIRROR:$CARDANO_MAINNET_MIRROR:ro\"
+        , \"$immutable_store:$immutable_store:ro\"
+        $(find -L "$immutable_store" -type f -exec realpath {} \; | xargs dirname | sort | uniq | xargs -I "{}" echo ", \"{}:{}:ro\"")
+      ]"
+    else
+      local optional_volumes="[]"
+    fi
+    # Volumes
+    local jq_filter="
+      [
+        \"${dir}:/tmp/cluster/run/current:rw,exec\"
+      ]
+      +
+      ( . | keys | map( \"${dir}/genesis:${container_mountpoint}/\" + . + \"/genesis:ro\" ) )
+      +
+      ( . | keys | map( \"${dir}/\" + . + \":${container_mountpoint}/generator/\" + . + \":ro\" ) )
+      +
+      ( . | keys | map( \"${dir}/genesis:${container_mountpoint}/generator/\" + . + \"/genesis:ro\" ) )
+      +
+      [
+          \"${dir}/genesis:${container_mountpoint}/generator/genesis:ro\"
+        , \"${dir}/genesis/utxo-keys:${container_mountpoint}/generator/genesis/utxo-keys:ro\"
+      ]
+      +
+      \$optional_volumes
+    "
+    local podman_volumes=$(jq "$jq_filter" --argjson optional_volumes "$optional_volumes" "$dir"/profile/node-specs.json)
+    # Create the task to run in `nomad` using `podman` driver.
+    # https://www.nomadproject.io/docs/job-specification
+    # https://www.nomadproject.io/docs/job-specification/job
+    # https://github.com/hashicorp/nomad-driver-podman#task-configuration
+cat > "$dir/nomad/job-cluster.hcl" <<- EOF
+job "cluster" {
+  region = "workbench"
+  datacenters = [ "workbench" ]
+  type = "service"
+  reschedule {
+    attempts = 0
+    unlimited = false
+  }
+  # A group defines a series of tasks that should be co-located
+  # on the same client (host). All tasks within a group will be
+  # placed on the same host.
+  group "cluster" {
+    restart {
+      attempts = 0
+      mode = "fail"
+    }
+    # The network stanza specifies the networking requirements for the task
+    # group, including the network mode and port allocations.
+    # https://developer.hashicorp.com/nomad/docs/job-specification/network
+    network {
+      mode = "host"
+    }
+EOF
+    local task_stanza_name="cluster"
+    local task_stanza_file="$dir/nomad/job-cluster-task-$task_stanza_name.hcl"
+    nomad_create_task_stanza "$task_stanza_file" "$task_stanza_name" "$podman_volumes"
+cat "$task_stanza_file" >> "$dir/nomad/job-cluster.hcl"
+
+    local task_stanza_name="cluster2"
+    local task_stanza_file="$dir/nomad/job-cluster-task-$task_stanza_name.hcl"
+    nomad_create_task_stanza "$task_stanza_file" "$task_stanza_name" "$podman_volumes"
+cat "$task_stanza_file" >> "$dir/nomad/job-cluster.hcl"
+
+cat >> "$dir/nomad/job-cluster.hcl" <<- EOF
+  }
+}
+EOF
+}
+
+nomad_create_task_stanza() {
+    local file=$1
+    local name=$2
+    local podman_volumes=$3
+    local oci_image_name=$(            envjqr 'oci_image_name')
+    local oci_image_tag=$(             envjqr 'oci_image_tag')
+    local container_workdir=$(         envjqr 'container_workdir')
+    local container_supervisor_nix=$(  envjqr 'container_supervisor_nix')
+    local container_supervisord_conf=$(envjqr 'container_supervisord_conf')
+    cat > "$file" <<- EOF
+# The task stanza creates an individual unit of work, such as a
+# Docker container, web application, or batch processing.
+task "$name" {
+  driver = "podman"
+  config {
+    image = "${oci_image_name}:${oci_image_tag}"
+    force_pull = false
+    # TODO/FIXME: Don't know how to make podman log to nomad
+    # instead of journald.
+    # No argument or block type is named \"logging\"
+    #logging = {
+    #  driver = "nomad"
+    #}
+    tmpfs = [
+      "/tmp"
+    ]
+    volumes = ${podman_volumes}
+    working_dir = "${container_workdir}"
+    hostname = "$name"
+  }
+  env = {
+    SUPERVISOR_NIX = "${container_supervisor_nix}"
+    SUPERVISORD_CONFIG = "${container_supervisord_conf}"
+  }
+  # Avoid: podman WARN[0066] StopSignal SIGTERM failed to stop container
+  # cluster-XX in 5 seconds, resorting to SIGKILL
+  kill_timeout = 15
+}
+EOF
+}
diff --git a/nix/workbench/wb b/nix/workbench/wb
index 30c42ee86e8..69e29d41416 100755
--- a/nix/workbench/wb
+++ b/nix/workbench/wb
@@ -20,6 +20,7 @@ global_basedir=${global_basedir:-$(realpath "$(dirname "$0")")}
 . "$global_basedir"/explain-mode.sh
 
 . "$global_basedir"/backend.sh
+. "$global_basedir"/backend/nomad.sh
 . "$global_basedir"/backend/supervisor.sh
 
 usage_main() {