diff --git a/.travis.yml b/.travis.yml index d376cbd62..cf9ce3d0d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -48,11 +48,13 @@ matrix: - git config --global user.name $GITHUB_USERNAME - git config --global user.email "$GITHUB_USERNAME@gmail.com" script: - - python3 tools/xml2palette/xml2palette.py -i ./ -o $PROJECT_NAME-$TRAVIS_BRANCH.palette + - python3 tools/xml2palette/xml2palette.py -i ./ -t daliuge -o $PROJECT_NAME-$TRAVIS_BRANCH.palette + - python3 tools/xml2palette/xml2palette.py -i ./ -t template -o $PROJECT_NAME-$TRAVIS_BRANCH-template.palette after_success: - git clone https://$GITHUB_TOKEN@github.com/ICRAR/EAGLE_test_repo - mkdir -p EAGLE_test_repo/$PROJECT_NAME - mv $PROJECT_NAME-$TRAVIS_BRANCH.palette EAGLE_test_repo/$PROJECT_NAME/ + - mv $PROJECT_NAME-$TRAVIS_BRANCH-template.palette EAGLE_test_repo/$PROJECT_NAME/ - cd EAGLE_test_repo - git add * - git diff-index --quiet HEAD || git commit -m "Automatically generated DALiuGE palette (branch $TRAVIS_BRANCH, commit $PROJECT_VERSION)" diff --git a/OpenAPI/tests/test.graph b/OpenAPI/tests/test.graph index bfa04f123..542b68a55 100644 --- a/OpenAPI/tests/test.graph +++ b/OpenAPI/tests/test.graph @@ -204,7 +204,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -306,7 +306,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -388,7 +388,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], diff --git a/daliuge-common/dlg/common/__init__.py b/daliuge-common/dlg/common/__init__.py index 871462d03..3198ce50a 100644 --- a/daliuge-common/dlg/common/__init__.py +++ b/daliuge-common/dlg/common/__init__.py @@ -39,6 +39,7 @@ class Categories: PLASMA = "Plasma" PLASMAFLIGHT = "PlasmaFlight" PARSET = "ParameterSet" + ENVIRONMENTVARS = "EnvironmentVars" MKN = "MKN" SCATTER = "Scatter" @@ -72,7 +73,8 @@ class Categories: Categories.JSON, Categories.PLASMA, Categories.PLASMAFLIGHT, - Categories.PARSET + Categories.PARSET, + Categories.ENVIRONMENTVARS } APP_DROP_TYPES = [ Categories.COMPONENT, diff --git a/daliuge-common/docker/Dockerfile.dev b/daliuge-common/docker/Dockerfile.dev index dd7c0a51b..d309e5ab5 100644 --- a/daliuge-common/docker/Dockerfile.dev +++ b/daliuge-common/docker/Dockerfile.dev @@ -7,18 +7,13 @@ FROM ubuntu:20.04 ARG BUILD_ID LABEL stage=builder LABEL build=$BUILD_ID -RUN apt-get update && apt-get install -y gcc python3 python3.8-venv && apt-get clean +RUN apt-get update && apt-get install -y gcc python3 python3.8-venv python3-pip python3-distutils libmetis-dev curl && apt-get clean COPY / /daliuge -RUN cd && python3 -m venv dlg && cd /daliuge && \ - . ${HOME}/dlg/bin/activate && \ - pip install numpy && \ - pip install . && \ - apt-get remove -y gcc && \ - apt-get autoremove -y +RUN cd / && python3 -m venv dlg && cd /daliuge && \ + . /dlg/bin/activate && \ + pip install wheel numpy && \ + pip install . - -FROM ubuntu:20.04 -RUN apt-get update && apt-get install -y bash -COPY --from=0 /root/dlg /root/dlg +# we don't clean this up, will be done in the derived images \ No newline at end of file diff --git a/daliuge-engine/dlg/apps/archiving.py b/daliuge-engine/dlg/apps/archiving.py index 99420052f..0e1d01a18 100644 --- a/daliuge-engine/dlg/apps/archiving.py +++ b/daliuge-engine/dlg/apps/archiving.py @@ -81,6 +81,7 @@ def store(self, inputDrop): # @details Takes an input and archives it in an NGAS server. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/appclass Application class/dlg.apps.archiving.NgasArchivingApp/String/readonly/ # \~English Application class # @param[in] param/ngasSrv NGAS Server URL/localhost/String/readwrite/ diff --git a/daliuge-engine/dlg/apps/bash_shell_app.py b/daliuge-engine/dlg/apps/bash_shell_app.py index b28bc3da4..ccf68f6c5 100644 --- a/daliuge-engine/dlg/apps/bash_shell_app.py +++ b/daliuge-engine/dlg/apps/bash_shell_app.py @@ -157,17 +157,28 @@ class BashShellBase(object): Common class for BashShell apps. It simply requires a command to be specified. """ - + #TODO: use the shlex module for most of the construction of the + # command line to get a proper and safe shell syntax command = dlg_string_param("Bash command", None) def initialize(self, **kwargs): super(BashShellBase, self).initialize(**kwargs) self.proc = None + self._inputRedirect = self._getArg(kwargs, "input_redirection", "") + self._outputRedirect = self._getArg(kwargs, "output_redirection", "") + self._cmdLineArgs = self._getArg(kwargs, "command_line_arguments", "") + self._applicationArgs = self._getArg(kwargs, "applicationArgs", {}) + self._argumentPrefix = self._getArg(kwargs, "argumentPrefix", "--") + self._paramValueSeparator = self._getArg(kwargs, \ + "paramValueSeparator", " ") + if not self.command: - raise InvalidDropException( - self, "No command specified, cannot create BashShellApp" - ) + self.command = self._getArg(kwargs, "command", None) + if not self.command: + raise InvalidDropException( + self, "No command specified, cannot create BashShellApp" + ) def _run_bash(self, inputs, outputs, stdin=None, stdout=subprocess.PIPE): """ @@ -186,7 +197,16 @@ def _run_bash(self, inputs, outputs, stdin=None, stdout=subprocess.PIPE): session_id = ( self._dlg_session.sessionId if self._dlg_session is not None else "" ) - cmd = self.command + argumentString = droputils.serialize_applicationArgs(self._applicationArgs, \ + self._argumentPrefix, self._paramValueSeparator) + # complete command including all additional parameters and optional redirects + cmd = f"{self.command} {argumentString} {self._cmdLineArgs} " + if self._outputRedirect: + cmd = f"{cmd} > {self._outputRedirect}" + if self._inputRedirect: + cmd = f"cat {self._inputRedirect} > {cmd}" + cmd = cmd.strip() + app_uid = self.uid # self.run_bash(self._command, self.uid, session_id, *args, **kwargs) @@ -210,7 +230,7 @@ def _run_bash(self, inputs, outputs, stdin=None, stdout=subprocess.PIPE): # Wrap everything inside bash cmd = ("/bin/bash", "-c", cmd) - logger.debug("Command after user creation and wrapping is: %s", cmd) + logger.debug("Command after wrapping is: %s", cmd) start = time.time() @@ -305,6 +325,21 @@ def execute(self, data): # * input-only stream # * full-stream # +## +# @brief BashShellApp +# @details An application component able to run an arbitrary command within the Bash Shell +# @par EAGLE_START +# @param category BashShellApp +# @param tag template +# @param[in] param/command Command//String/readwrite/ +# \~English The command to be executed +# @param[in] param/input_redirection Input Redirection//String/readwrite/ +# \~English The command line argument that specifies the input into this application +# @param[in] param/output_redirection Output Redirection//String/readwrite/ +# \~English The command line argument that specifies the output from this application +# @param[in] param/command_line_arguments Command Line Arguments//String/readwrite/ +# \~English Additional command line arguments to be added to the command line to be executed +# @par EAGLE_END class BashShellApp(BashShellBase, BarrierAppDROP): """ An app that runs a bash command in batch mode; that is, it waits until all diff --git a/daliuge-engine/dlg/apps/crc.py b/daliuge-engine/dlg/apps/crc.py index c3dedfa60..9a692042f 100644 --- a/daliuge-engine/dlg/apps/crc.py +++ b/daliuge-engine/dlg/apps/crc.py @@ -78,6 +78,7 @@ def run(self): # i.e. A "streamingConsumer" of its predecessor in the graph # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/appclass Application Class/dlg.apps.crc.CRCStreamApp/String/readonly/ # \~English Application class # @param[out] port/data Data/String/ diff --git a/daliuge-engine/dlg/apps/dockerapp.py b/daliuge-engine/dlg/apps/dockerapp.py index bec25b7bd..60af7cf83 100644 --- a/daliuge-engine/dlg/apps/dockerapp.py +++ b/daliuge-engine/dlg/apps/dockerapp.py @@ -67,6 +67,31 @@ def waitForIp(self, timeout=None): return self._uid, self._containerIp +## +# @brief Docker +# @details +# @par EAGLE_START +# @param category Docker +# @param tag template +# @param[in] param/image Image//String/readwrite/ +# \~English The name of the docker image to be used for this application +# @param[in] param/tag Tag/1.0/String/readwrite/ +# \~English The tag of the docker image to be used for this application +# @param[in] param/digest Digest//String/readwrite/ +# \~English The hexadecimal hash (long version) of the docker image to be used for this application +# @param[in] param/command Command//String/readwrite/ +# \~English The command line to run within the docker instance. The specified command will be executed in a bash shell. That means that images will need a bash shell. +# @param[in] param/user User//String/readwrite/ +# \~English Username of the user who will run the application within the docker image +# @param[in] param/ensureUserAndSwitch Ensure User And Switch/False/Boolean/readwrite/ +# \~English Make sure the user specified in the User parameter exists and then run the docker container as that user +# @param[in] param/removeContainer Remove Container/True/Boolean/readwrite/ +# \~English Instruct Docker engine to delete the container after execution is complete +# @param[in] param/additionalBindings Additional Bindings//String/readwrite/ +# \~English Directories which will be visible inside the container during run-time. Format is srcdir_on_host:trgtdir_on_container. Multiple entries can be separated by commas. +# @param[in] param/portMappings Port Mappings//String/readwrite/ +# \~English Port mappings on the host machine +# @par EAGLE_END class DockerApp(BarrierAppDROP): """ A BarrierAppDROP that represents a process running in a container @@ -204,17 +229,28 @@ def initialize(self, **kwargs): ) self._command = self._getArg(kwargs, "command", None) + if not self._command: logger.warning( "No command specified. Assume that a default command is executed in the container" ) + # The above also means that we can't pass applicationArgs # raise InvalidDropException( # self, "No command specified, cannot create DockerApp") + else: + self._applicationArgs = self._getArg(kwargs, "applicationArgs", {}) + + # construct the actual command line from all application parameters + argumentPrefix = self._getArg(kwargs, "argumentPrefix", "--") + argumentString = droputils.serialize_applicationArgs(self._applicationArgs, \ + argumentPrefix) + self._command = f"{self._command} {argumentString}" # The user used to run the process in the docker container # By default docker containers run as root, but we don't want to run # a process using a different user because otherwise anything that that # process writes to the filesystem + # TODO: User switching should be changed to be transparent self._user = self._getArg(kwargs, "user", None) # In some cases we want to make sure the command in the container runs diff --git a/daliuge-engine/dlg/apps/dynlib.py b/daliuge-engine/dlg/apps/dynlib.py index 0bde5e48b..87d814d56 100644 --- a/daliuge-engine/dlg/apps/dynlib.py +++ b/daliuge-engine/dlg/apps/dynlib.py @@ -348,6 +348,15 @@ def addStreamingInput(self, streamingInputDrop, back=True): self._c_app.n_streaming_inputs += 1 +## +# @brief DynlibApp +# @details An application component run from a dynamic library +# @par EAGLE_START +# @param category DynlibApp +# @param tag template +# @param[in] param/libpath Library Path//String/readwrite/ +# \~English The location of the shared object/DLL that implements this application +# @par EAGLE_END class DynlibApp(DynlibAppBase, BarrierAppDROP): """Loads a dynamic library into the current process and runs it""" diff --git a/daliuge-engine/dlg/apps/mpi.py b/daliuge-engine/dlg/apps/mpi.py index 1835000c9..fbf43ff4a 100644 --- a/daliuge-engine/dlg/apps/mpi.py +++ b/daliuge-engine/dlg/apps/mpi.py @@ -32,7 +32,15 @@ logger = logging.getLogger(__name__) - +## +# @brief MPI +# @details An application component using the Message Passing Interface (MPI) +# @par EAGLE_START +# @param category Mpi +# @param tag template +# @param[in] param/num_of_procs Num procs//Integer/readwrite/ +# \~English Number of processes used for this application +# @par EAGLE_END class MPIApp(BarrierAppDROP): """ An application drop representing an MPI job. diff --git a/daliuge-engine/dlg/apps/plasma.py b/daliuge-engine/dlg/apps/plasma.py index 928267b7d..54e173d39 100644 --- a/daliuge-engine/dlg/apps/plasma.py +++ b/daliuge-engine/dlg/apps/plasma.py @@ -48,6 +48,7 @@ # via Plasma. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/plasma_path Plasma Path//String/readwrite/ # \~English Path to plasma store. # @param[in] param/appclass Application class/dlg.apps.plasma.MSStreamingPlasmaConsumer/String/readonly/ @@ -135,6 +136,7 @@ def dropCompleted(self, uid, drop_state): # via Plasma. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/plasma_path Plasma Path//String/readwrite/ # \~English Path to plasma store # @param[in] param/appclass Application class/dlg.apps.plasma.MSStreamingPlasmaProducer/String/readonly/ @@ -203,6 +205,7 @@ def run(self): # @details Batch read entire Measurement Set from Plasma. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/appclass Application class/dlg.apps.plasma.MSPlasmaReader/String/readonly/ # \~English Application class # @param[in] port/plasma_ms_input Plasma MS Input/Measurement Set/ @@ -265,6 +268,7 @@ def run(self, **kwargs): # @details Batch write entire Measurement Set to Plasma. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/appclass Application class/dlg.apps.plasma.MSPlasmaWriter/String/readonly/ # \~English Application class # @param[in] port/input_ms Input MS/Measurement Set/ diff --git a/daliuge-engine/dlg/apps/pyfunc.py b/daliuge-engine/dlg/apps/pyfunc.py index bc9a564d5..7e7b3aecf 100644 --- a/daliuge-engine/dlg/apps/pyfunc.py +++ b/daliuge-engine/dlg/apps/pyfunc.py @@ -107,6 +107,7 @@ def import_using_code(code): # being written to its corresponding output. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/appclass Application Class/dlg.apps.pyfunc.PyFuncApp/String/readonly/ # \~English Application class # @param[in] param/func_name Function Name//String/readwrite/ diff --git a/daliuge-engine/dlg/apps/scp.py b/daliuge-engine/dlg/apps/scp.py index 9451d717b..b219f6e8e 100644 --- a/daliuge-engine/dlg/apps/scp.py +++ b/daliuge-engine/dlg/apps/scp.py @@ -45,6 +45,7 @@ # single output via SSH's scp protocol. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/appclass Application Class/dlg.apps.scp.ScpApp/String/readonly/ # \~English Application class # @param[in] param/remoteUser Remote User//String/readwrite/ diff --git a/daliuge-engine/dlg/apps/simple.py b/daliuge-engine/dlg/apps/simple.py index 5de519743..6e7c360f9 100644 --- a/daliuge-engine/dlg/apps/simple.py +++ b/daliuge-engine/dlg/apps/simple.py @@ -34,13 +34,13 @@ from dlg import droputils, utils from dlg.drop import BarrierAppDROP, BranchAppDrop, ContainerDROP from dlg.meta import ( - dlg_float_param, + dlg_float_param, dlg_string_param, - dlg_bool_param, + dlg_bool_param, dlg_int_param, - dlg_component, + dlg_component, dlg_batch_input, - dlg_batch_output, + dlg_batch_output, dlg_streaming_input ) from dlg.exceptions import DaliugeException @@ -68,6 +68,7 @@ def run(self): # without executing real algorithms. Very useful for debugging. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/sleepTime Sleep Time/5/Integer/readwrite/ # \~English The number of seconds to sleep # @param[in] param/appclass Application Class/dlg.apps.simple.SleepApp/String/readonly/ @@ -101,6 +102,7 @@ def run(self): # content recursively. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/appclass Application Class/dlg.apps.simple.CopyApp/String/readonly/ # \~English Application class # @par EAGLE_END @@ -151,6 +153,7 @@ def run(self): # The resulting array will be send to all connected output apps. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/size Size/100/Integer/readwrite/ # \~English The size of the array # @param[in] param/integer Integer/True/Boolean/readwrite/ @@ -230,6 +233,7 @@ def _getArray(self): # will also be send to all connected output apps. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/method Method/mean/String/readwrite/ # \~English The method used for averaging # @param[in] param/appclass Application Class/dlg.apps.simple.AverageArraysApp/String/readonly/ @@ -329,6 +333,7 @@ class GenericNpyGatherApp(BarrierAppDROP): """ A BarrierAppDrop that reduces then gathers one or more inputs using cummulative operations. function: string <['sum']|'prod'|'min'|'max'|'add'|'multiply'|'maximum'|'minimum'>. + """ component_meta = dlg_component( "GenericNpyGatherApp", @@ -341,6 +346,7 @@ class GenericNpyGatherApp(BarrierAppDROP): # reduce and combine operation pair names functions = { # reduce and gather e.g. output dimension is reduces + "sum": "add", # sum reduction of inputs along an axis first then reduces across drops "prod": "multiply", # prod reduction of inputs along an axis first then reduces across drops "max": "maximum", # max reduction of input along an axis first then reduces across drops @@ -351,6 +357,7 @@ class GenericNpyGatherApp(BarrierAppDROP): "multiply": None, # elementwise multiplication of inputs, ndarrays must be of same shape "maximum": None, # elementwise maximums of inputs, ndarrays must be of same shape "minimum": None # elementwise minimums of inputs, ndarrays must be of same shape + } function: str = dlg_string_param("function", "sum") reduce_axes: str = dlg_string_param("reduce_axes", "None") @@ -402,6 +409,7 @@ def combine_inputs(self): # the same message. App does not require any input. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/greet Greet/World/String/readwrite/ # \~English What appears after 'Hello ' # @param[in] param/appclass Application Class/dlg.apps.simple.HelloWorldApp/String/readonly/ @@ -454,6 +462,7 @@ def run(self): # it to all outputs. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/url URL/"https://eagle.icrar.org"/String/readwrite/ # \~English The URL to retrieve # @param[in] param/appclass Application Class/dlg.apps.simple.UrlRetrieveApp/String/readonly/ @@ -505,6 +514,7 @@ def run(self): # resulting array. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/appclass Application Class/dlg.apps.simple.GenericScatterApp/String/readonly/ # \~English Application class # @param[out] port/array Array/Array/ @@ -563,6 +573,7 @@ def run(self): # resulting array. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/appclass Application Class/dlg.apps.simple.GenericNpyScatterApp/String/readonly/ # \~English Application class # @param[in] param/scatter_axes Scatter Axes/String/readwrite @@ -638,9 +649,8 @@ def condition(self): # since this operation will not yield. # The resulting array will be sent to all connected output apps. # @par EAGLE_START -# @param gitrepo $(GIT_REPO) -# @param version $(PROJECT_VERSION) # @param category PythonApp +# @param tag daliuge # @param[in] param/size/100/Integer/readwrite # \~English the size of the array\n # @param[in] param/appclass/dlg.apps.simple.ListAppendThrashingApp/String/readonly diff --git a/daliuge-engine/dlg/apps/socket_listener.py b/daliuge-engine/dlg/apps/socket_listener.py index b1671b2fd..c56f41374 100644 --- a/daliuge-engine/dlg/apps/socket_listener.py +++ b/daliuge-engine/dlg/apps/socket_listener.py @@ -56,6 +56,7 @@ # so data can be written into them through the framework. # @par EAGLE_START # @param category PythonApp +# @param tag daliuge # @param[in] param/appclass Application Class/dlg.apps.socket_listener.SocketListener/String/readonly/ # \~English Application class # @param[in] param/host Host/127.0.0.1/String/readwrite/ diff --git a/daliuge-engine/dlg/deploy/create_dlg_job.py b/daliuge-engine/dlg/deploy/create_dlg_job.py index 25bd56f52..838956714 100644 --- a/daliuge-engine/dlg/deploy/create_dlg_job.py +++ b/daliuge-engine/dlg/deploy/create_dlg_job.py @@ -28,234 +28,51 @@ import datetime import optparse -import os import pwd import re import socket -import string -import subprocess import sys import time -import json +import os -from dlg import utils -from dlg.deploy.configs import * # get all available configurations -from dlg.runtime import __git_version__ as git_commit +from dlg.deploy.configs import ConfigFactory # get all available configurations +from deployment_constants import DEFAULT_AWS_MON_PORT, DEFAULT_AWS_MON_HOST +from slurm_client import SlurmClient -default_aws_mon_host = "sdp-dfms.ddns.net" # TODO: need to change this -default_aws_mon_port = 8898 +FACILITIES = ConfigFactory.available() -facilities = ConfigFactory.available() -class SlurmClient(object): +def get_timestamp(line): """ - parameters we can control: - - 1. user group / account name (Required) - 2. whether to submit a graph, and if so provide graph path - 3. # of nodes (of Drop Managers) - 4. how long to run - 5. whether to produce offline graph vis - 6. whether to attach proxy for remote monitoring, and if so provide - DLG_MON_HOST - DLG_MON_PORT - 7. Root directory of the Log files (Required) + microsecond precision """ + split = line.split() + date_time = "{0}T{1}".format(split[0], split[1]) + pattern = "%Y-%m-%dT%H:%M:%S,%f" + epoch = time.mktime(time.strptime(date_time, pattern)) + return datetime.datetime.strptime(date_time, pattern).microsecond / 1e6 + epoch - def __init__( - self, - log_root=None, - acc=None, - physical_graph_template_data=None, # JSON formatted physical graph template - logical_graph=None, - job_dur=30, - num_nodes=None, - run_proxy=False, - mon_host=default_aws_mon_host, - mon_port=default_aws_mon_port, - logv=1, - facility=None, - zerorun=False, - max_threads=0, - sleepncopy=False, - num_islands=None, - all_nics=False, - check_with_session=False, - submit=True, - pip_name=None, - ): - self._config = ConfigFactory.create_config(facility=facility) - self._acc = self._config.getpar("acc") if (acc is None) else acc - self._log_root = ( - self._config.getpar("log_root") if (log_root is None) else log_root - ) - self.modules = self._config.getpar("modules") - self._num_nodes = num_nodes - self._job_dur = job_dur - self._logical_graph = logical_graph - self._physical_graph_template_data = physical_graph_template_data - self._visualise_graph = False - self._run_proxy = run_proxy - self._mon_host = mon_host - self._mon_port = mon_port - self._pip_name = pip_name - self._logv = logv - self._zerorun = zerorun - self._max_threads = max_threads - self._sleepncopy = sleepncopy - self._num_islands = num_islands - self._all_nics = all_nics - self._check_with_session = check_with_session - self._submit = submit - self._dtstr = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") # .%f - self._set_name_and_nodenumber() - - def _set_name_and_nodenumber(self): - """ - Given the physical graph data extract the graph name and the total number of - nodes. We are not making a decision whether the island managers are running - on separate nodes here, thus the number is the sum of all island - managers and node managers. The values are only populated if not given on the - init already. - - TODO: We will probably need to do the same with job duration and CPU number - """ - pgt_data = json.loads(self._physical_graph_template_data) - try: - (pgt_name, pgt) = pgt_data - except: - raise ValueError(type(pgt_data)) - nodes = list(map(lambda x:x['node'], pgt)) - islands = list(map(lambda x:x['island'], pgt)) - if self._num_islands == None: - self._num_islands = len(dict(zip(islands,nodes))) - if self._num_nodes == None: - num_nodes = list(map(lambda x,y:x+y, islands, nodes)) - self._num_nodes = len(dict(zip(num_nodes, nodes))) # uniq comb. - if (self._pip_name == None): - self._pip_name = pgt_name - return - - - @property - def num_daliuge_nodes(self): - if self._run_proxy: - ret = self._num_nodes - 1 # exclude the proxy node - else: - ret = self._num_nodes - 0 # exclude the data island node? - if ret <= 0: - raise Exception( - "Not enough nodes {0} to run DALiuGE.".format(self._num_nodes) - ) - return ret - - def get_log_dirname(self): - """ - (pipeline name_)[Nnum_of_daliuge_nodes]_[time_stamp] - """ - # Moved setting of dtstr to init to ensure it doesn't change for this instance of SlurmClient() - #dtstr = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") # .%f - graph_name = self._pip_name.split('_')[0] # use only the part of the graph name - return "{0}_{1}".format(graph_name, self._dtstr) - - def label_job_dur(self): - """ - e.g. 135 min --> 02:15:00 - """ - seconds = self._job_dur * 60 - m, s = divmod(seconds, 60) - h, m = divmod(m, 60) - return "%02d:%02d:%02d" % (h, m, s) - - def create_job_desc(self, physical_graph_file): - log_dir = "{0}/{1}".format(self._log_root, self.get_log_dirname()) - pardict = dict() - pardict["NUM_NODES"] = str(self._num_nodes) - pardict["PIP_NAME"] = self._pip_name - pardict["SESSION_ID"] = os.path.split(log_dir)[-1] - pardict["JOB_DURATION"] = self.label_job_dur() - pardict["ACCOUNT"] = self._acc - pardict["PY_BIN"] = sys.executable - pardict["LOG_DIR"] = log_dir - pardict["GRAPH_PAR"] = ( - '-L "{0}"'.format(self._logical_graph) - if self._logical_graph - else '-P "{0}"'.format(physical_graph_file) - if physical_graph_file - else "" - ) - pardict["PROXY_PAR"] = ( - "-m %s -o %d" % (self._mon_host, self._mon_port) if self._run_proxy else "" - ) - pardict["GRAPH_VIS_PAR"] = "-d" if self._visualise_graph else "" - pardict["LOGV_PAR"] = "-v %d" % self._logv - pardict["ZERORUN_PAR"] = "-z" if self._zerorun else "" - pardict["MAXTHREADS_PAR"] = "-t %d" % (self._max_threads) - pardict["SNC_PAR"] = "--app 1" if self._sleepncopy else "--app 0" - pardict["NUM_ISLANDS_PAR"] = "-s %d" % (self._num_islands) - pardict["ALL_NICS"] = "-u" if self._all_nics else "" - pardict["CHECK_WITH_SESSION"] = "-S" if self._check_with_session else "" - pardict["MODULES"] = self.modules - - job_desc = init_tpl.safe_substitute(pardict) - return job_desc - - - def submit_job(self): - log_dir = "{0}/{1}".format(self._log_root, self.get_log_dirname()) - if not os.path.exists(log_dir): - os.makedirs(log_dir) - - physical_graph_file = "{0}/{1}".format(log_dir, - self._pip_name) - with open(physical_graph_file, 'w') as pf: - pf.write(self._physical_graph_template_data) - pf.close() - - job_file = "{0}/jobsub.sh".format(log_dir) - job_desc = self.create_job_desc(physical_graph_file) - with open(job_file, "w") as jf: - jf.write(job_desc) - - with open(os.path.join(log_dir, "git_commit.txt"), "w") as gf: - gf.write(git_commit) - if self._submit: - os.chdir(log_dir) # so that slurm logs will be dumped here - print(subprocess.check_output(["sbatch", job_file])) - else: - print(f"Created job submission script {job_file}") - -class LogEntryPair(object): - """ """ +class LogEntryPair: + """ + Generates log entries + """ def __init__(self, name, gstart, gend): self._name = name - self._gstart = ( - gstart + 2 - ) # group 0 is the whole matching line, group 1 is the catchall + self._gstart = (gstart + 2) # group 0 is the whole matching line, group 1 is the catchall self._gend = gend + 2 self._start_time = None self._end_time = None - self._other = dict() # hack - - def get_timestamp(self, line): - """ - microsecond precision - """ - sp = line.split() - date_time = "{0}T{1}".format(sp[0], sp[1]) - pattern = "%Y-%m-%dT%H:%M:%S,%f" - epoch = time.mktime(time.strptime(date_time, pattern)) - return datetime.datetime.strptime(date_time, pattern).microsecond / 1e6 + epoch + self._other = {} def check_start(self, match, line): if self._start_time is None and match.group(self._gstart): - self._start_time = self.get_timestamp(line) + self._start_time = get_timestamp(line) def check_end(self, match, line): if self._end_time is None and match.group(self._gend): - self._end_time = self.get_timestamp(line) + self._end_time = get_timestamp(line) if self._name == "unroll": self._other["num_drops"] = int(line.split()[-1]) elif self._name == "node managers": @@ -267,7 +84,8 @@ def check_end(self, match, line): def get_duration(self): if (self._start_time is None) or (self._end_time is None): - # print "Cannot calc duration for '{0}': start_time:{1}, end_time:{2}".format(self._name, + # print "Cannot calc duration for + # '{0}': start_time:{1}, end_time:{2}".format(self._name, # self._start_time, self._end_time) return None return self._end_time - self._start_time @@ -276,8 +94,55 @@ def reset(self): self._start_time = None self._end_time = None + @property + def name(self): + return self._name + + @property + def other(self): + return self._other + + +def build_dim_log_entry_pairs(): + return [ + LogEntryPair(name, g1, g2) + for name, g1, g2 in ( + ("unroll", 0, 1), + ("translate", 2, 3), + ("gen pg spec", 3, 4), + ("create session", 5, 6), + ("separate graph", 7, 8), + ("add session to all", 9, 10), + ("deploy session to all", 11, 12), + ("build drop connections", 13, 14), + ("trigger drops", 15, 16), + ("node managers", 17, 17), + ) + ] + + +def build_nm_log_entry_pairs(): + return [ + LogEntryPair(name, g1, g2) + for name, g1, g2 in ( + ("completion_time_old", 0, 3), # Old master branch + ("completion_time", 2, 3), + ("node_deploy_time", 1, 2), + ) + ] + + +def construct_catchall_pattern(node_type): + pattern_strs = LogParser.kwords.get(node_type) + patterns = [ + x.format(".*").replace("(", r"\(").replace(")", r"\)") for x in pattern_strs + ] + catchall = "|".join(["(%s)" % (s,) for s in patterns]) + catchall = ".*(%s).*" % (catchall,) + return re.compile(catchall) + -class LogParser(object): +class LogParser: """ TODO: This needs adjustment to new log directory names!! @@ -345,90 +210,54 @@ def __init__(self, log_dir): if not self.check_log_dir(log_dir): raise Exception("No DIM log found at: {0}".format(log_dir)) self._log_dir = log_dir - self._dim_catchall_pattern = self.construct_catchall_pattern(node_type="dim") - self._nm_catchall_pattern = self.construct_catchall_pattern(node_type="nm") - - def build_dim_log_entry_pairs(self): - return [ - LogEntryPair(name, g1, g2) - for name, g1, g2 in ( - ("unroll", 0, 1), - ("translate", 2, 3), - ("gen pg spec", 3, 4), - ("create session", 5, 6), - ("separate graph", 7, 8), - ("add session to all", 9, 10), - ("deploy session to all", 11, 12), - ("build drop connections", 13, 14), - ("trigger drops", 15, 16), - ("node managers", 17, 17), - ) - ] - - def build_nm_log_entry_pairs(self): - return [ - LogEntryPair(name, g1, g2) - for name, g1, g2 in ( - ("completion_time_old", 0, 3), # Old master branch - ("completion_time", 2, 3), - ("node_deploy_time", 1, 2), - ) - ] - - def construct_catchall_pattern(self, node_type): - pattern_strs = LogParser.kwords.get(node_type) - patterns = [ - x.format(".*").replace("(", r"\(").replace(")", r"\)") for x in pattern_strs - ] - catchall = "|".join(["(%s)" % (s,) for s in patterns]) - catchall = ".*(%s).*" % (catchall,) - return re.compile(catchall) + self._dim_catchall_pattern = construct_catchall_pattern(node_type="dim") + self._nm_catchall_pattern = construct_catchall_pattern(node_type="nm") def parse(self, out_csv=None): """ e.g. lofar_std_N4_2016-08-22T11-52-11 """ logb_name = os.path.basename(self._log_dir) - ss = re.search("_N[0-9]+_", logb_name) - if ss is None: + search_string = re.search("_N[0-9]+_", logb_name) + if search_string is None: raise Exception("Invalid log directory: {0}".format(self._log_dir)) - delimit = ss.group(0) - sp = logb_name.split(delimit) - pip_name = sp[0] - do_date = sp[1] + delimit = search_string.group(0) + split = logb_name.split(delimit) + pip_name = split[0] + do_date = split[1] num_nodes = int(delimit.split("_")[1][1:]) user_name = pwd.getpwuid(os.stat(self._dim_log_f[0]).st_uid).pw_name gitf = os.path.join(self._log_dir, "git_commit.txt") if os.path.exists(gitf): - with open(gitf, "r") as gf: - git_commit = gf.readline().strip() + with open(gitf, "r") as git_file: + git_commit = git_file.readline().strip() else: git_commit = "None" # parse DIM log - dim_log_pairs = self.build_dim_log_entry_pairs() + dim_log_pairs = build_dim_log_entry_pairs() for lff in self._dim_log_f: with open(lff, "r") as dimlog: for line in dimlog: - m = self._dim_catchall_pattern.match(line) - if not m: + matches = self._dim_catchall_pattern.match(line) + if not matches: continue for lep in dim_log_pairs: - lep.check_start(m, line) - lep.check_end(m, line) + lep.check_start(matches, line) + lep.check_end(matches, line) num_drops = -1 temp_dim = [] num_node_mgrs = 0 for lep in dim_log_pairs: add_dur = True - if "unroll" == lep._name: - num_drops = lep._other.get("num_drops", -1) - elif "node managers" == lep._name: - num_node_mgrs = lep._other.get("num_node_mgrs", 0) + if lep.name == "unroll": + num_drops = lep.other.get("num_drops", -1) + elif lep.name == "node managers": + num_node_mgrs = lep.other.get("num_node_mgrs", 0) add_dur = False - elif "build drop connections" == lep._name: - num_edges = lep._other.get("num_edges", -1) + elif lep.name == "build drop connections": + num_edges = lep.other.get("num_edges", -1) temp_dim.append(str(num_edges)) if add_dur: temp_dim.append(str(lep.get_duration())) @@ -439,32 +268,32 @@ def parse(self, out_csv=None): num_finished_sess = 0 num_dims = 0 - for df in os.listdir(self._log_dir): + for log_directory_file_name in os.listdir(self._log_dir): # Check this is a dir and contains the NM log - if not os.path.isdir(os.path.join(self._log_dir, df)): + if not os.path.isdir(os.path.join(self._log_dir, log_directory_file_name)): continue - nm_logf = os.path.join(self._log_dir, df, "dlgNM.log") - nm_dim_logf = os.path.join(self._log_dir, df, "dlgDIM.log") - nm_mm_logf = os.path.join(self._log_dir, df, "dlgMM.log") + nm_logf = os.path.join(self._log_dir, log_directory_file_name, "dlgNM.log") + nm_dim_logf = os.path.join(self._log_dir, log_directory_file_name, "dlgDIM.log") + nm_mm_logf = os.path.join(self._log_dir, log_directory_file_name, "dlgMM.log") if not os.path.exists(nm_logf): if os.path.exists(nm_dim_logf) or os.path.exists(nm_mm_logf): num_dims += 1 continue # Start anew every time - nm_log_pairs = self.build_nm_log_entry_pairs() + nm_log_pairs = build_nm_log_entry_pairs() nm_logs.append(nm_log_pairs) # Read NM log and fill all LogPair objects with open(nm_logf, "r") as nmlog: for line in nmlog: - m = self._nm_catchall_pattern.match(line) - if not m: + matches = self._nm_catchall_pattern.match(line) + if not matches: continue for lep in nm_log_pairs: - lep.check_start(m, line) - lep.check_end(m, line) + lep.check_start(matches, line) + lep.check_end(matches, line) # Looking for the deployment times and counting for finished sessions for lep in nm_log_pairs: @@ -474,9 +303,9 @@ def parse(self, out_csv=None): if dur is None: continue - if lep._name in ("completion_time", "completion_time_old"): + if lep.name in ("completion_time", "completion_time_old"): num_finished_sess += 1 - elif lep._name == "node_deploy_time": + elif lep.name == "node_deploy_time": if dur > max_node_deploy_time: max_node_deploy_time = dur @@ -505,14 +334,13 @@ def parse(self, out_csv=None): max_exec_time = 0 for log_entry_pairs in nm_logs: - indexed_leps = {lep._name: lep for lep in log_entry_pairs} + indexed_leps = {lep.name: lep for lep in log_entry_pairs} deploy_time = indexed_leps["node_deploy_time"].get_duration() if deploy_time is None: # since some node managers failed to start continue - exec_time = ( - indexed_leps["completion_time"].get_duration() - or indexed_leps["completion_time_old"].get_duration() - ) + exec_time = (indexed_leps["completion_time"].get_duration() + or indexed_leps["completion_time_old"].get_duration() + ) if exec_time is None: continue real_exec_time = exec_time - (max_node_deploy_time - deploy_time) @@ -534,9 +362,9 @@ def parse(self, out_csv=None): num_dims = num_dims if num_dims == 1 else num_dims - 1 # exclude master manager add_line = ",".join(ret + temp_dim + temp_nm + [str(int(num_dims))]) if out_csv is not None: - with open(out_csv, "a") as of: - of.write(add_line) - of.write(os.linesep) + with open(out_csv, "a") as out_file: + out_file.write(add_line) + out_file.write(os.linesep) else: print(add_line) @@ -556,8 +384,9 @@ def check_log_dir(self, log_dir): return False -if __name__ == "__main__": - parser = optparse.OptionParser(usage='\n%prog -a [1|2] -f [options]\n\n%prog -h for further help') +def main(): + parser = optparse.OptionParser( + usage='\n%prog -a [1|2] -f [options]\n\n%prog -h for further help') parser.add_option( "-a", @@ -642,7 +471,7 @@ def check_log_dir(self, log_dir): type="string", dest="mon_host", help="Monitor host IP (optional)", - default=default_aws_mon_host, + default=DEFAULT_AWS_MON_HOST, ) parser.add_option( "-o", @@ -651,7 +480,7 @@ def check_log_dir(self, log_dir): type="int", dest="mon_port", help="The port to bind DALiuGE monitor", - default=default_aws_mon_port, + default=DEFAULT_AWS_MON_PORT, ) parser.add_option( "-v", @@ -731,9 +560,9 @@ def check_log_dir(self, log_dir): "-f", "--facility", dest="facility", - choices=facilities, + choices=FACILITIES, action="store", - help=f"The facility for which to create a submission job\nValid options: {facilities}", + help=f"The facility for which to create a submission job\nValid options: {FACILITIES}", default=None, ) parser.add_option( @@ -744,12 +573,12 @@ def check_log_dir(self, log_dir): default=True, ) - (opts, args) = parser.parse_args(sys.argv) + (opts, _) = parser.parse_args(sys.argv) if not (opts.action and opts.facility) and not opts.configs: parser.error("Missing required parameters!") - if opts.facility not in facilities: - parser.error(f"Unknown facility provided. Please choose from {facilities}") - + if opts.facility not in FACILITIES: + parser.error(f"Unknown facility provided. Please choose from {FACILITIES}") + if opts.action == 2: if opts.log_dir is None: # you can specify: @@ -765,14 +594,14 @@ def check_log_dir(self, log_dir): ) # or a root log directory else: - for df in os.listdir(log_root): - df = os.path.join(log_root, df) - if os.path.isdir(df): + for log_dir in os.listdir(log_root): + log_dir = os.path.join(log_root, log_dir) + if os.path.isdir(log_dir): try: - log_parser = LogParser(df) + log_parser = LogParser(log_dir) log_parser.parse(out_csv=opts.csv_output) except Exception as exp: - print("Fail to parse {0}: {1}".format(df, exp)) + print("Fail to parse {0}: {1}".format(log_dir, exp)) else: log_parser = LogParser(opts.log_dir) log_parser.parse(out_csv=opts.csv_output) @@ -786,7 +615,7 @@ def check_log_dir(self, log_dir): if path_to_graph_file and not os.path.exists(path_to_graph_file): parser.error("Cannot locate graph file at '{0}'".format(path_to_graph_file)) - pc = SlurmClient( + client = SlurmClient( facility=opts.facility, job_dur=opts.job_dur, num_nodes=opts.num_nodes, @@ -801,12 +630,16 @@ def check_log_dir(self, log_dir): check_with_session=opts.check_with_session, logical_graph=opts.logical_graph, physical_graph=opts.physical_graph, - submit=True if opts.submit in ['True','true'] else False, + submit=opts.submit in ['True', 'true'], ) - pc._visualise_graph = opts.visualise_graph - pc.submit_job() - elif opts.configs == True: - print(f"Available facilities: {facilities}") + client._visualise_graph = opts.visualise_graph + client.submit_job() + elif opts.configs: + print(f"Available facilities: {FACILITIES}") else: parser.print_help() parser.error("Invalid input!") + + +if __name__ == "__main__": + main() diff --git a/daliuge-engine/dlg/deploy/deployment_constants.py b/daliuge-engine/dlg/deploy/deployment_constants.py new file mode 100644 index 000000000..c35fd91fe --- /dev/null +++ b/daliuge-engine/dlg/deploy/deployment_constants.py @@ -0,0 +1,27 @@ +# +# ICRAR - International Centre for Radio Astronomy Research +# (c) UWA - The University of Western Australia, 2016 +# Copyright by UWA (in the framework of the ICRAR) +# All rights reserved +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# +""" +Contains deployment constants, that could be changed to easily re-configure deployment. +""" + +DEFAULT_AWS_MON_HOST = "sdp-dfms.ddns.net" # TODO: need to change this +DEFAULT_AWS_MON_PORT = 8898 diff --git a/daliuge-engine/dlg/deploy/slurm_utils.py b/daliuge-engine/dlg/deploy/deployment_utils.py similarity index 69% rename from daliuge-engine/dlg/deploy/slurm_utils.py rename to daliuge-engine/dlg/deploy/deployment_utils.py index d753e87ca..657c44868 100644 --- a/daliuge-engine/dlg/deploy/slurm_utils.py +++ b/daliuge-engine/dlg/deploy/deployment_utils.py @@ -19,6 +19,7 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, # MA 02111-1307 USA # +import json class ListTokens(object): @@ -101,3 +102,51 @@ def finish_element(sub_values, range_start): def list_as_string(s): """'a008,b[072-073,076]' --> ['a008', 'b072', 'b073', 'b076']""" return _parse_list_tokens(iter(_list_tokenizer(s))) + + +def find_numislands(physical_graph_template_file): + """ + Given the physical graph data extract the graph name and the total number of + nodes. We are not making a decision whether the island managers are running + on separate nodes here, thus the number is the sum of all island + managers and node managers. The values are only populated if not given on the + init already. + TODO: We will probably need to do the same with job duration and CPU number + """ + + pgt_data = json.loads(physical_graph_template_file) + try: + (pgt_name, pgt) = pgt_data + except: + raise ValueError(type(pgt_data)) + nodes = list(map(lambda x: x['node'], pgt)) + islands = list(map(lambda x: x['island'], pgt)) + num_islands = len(dict(zip(islands, nodes))) + num_nodes = list(map(lambda x, y: x + y, islands, nodes)) + pip_name = pgt_name + return num_islands, num_nodes, pip_name + + +def label_job_dur(job_dur): + """ + e.g. 135 min --> 02:15:00 + """ + seconds = job_dur * 60 + minute, sec = divmod(seconds, 60) + hour, minute = divmod(minute, 60) + return "%02d:%02d:%02d" % (hour, minute, sec) + + +def num_daliuge_nodes(num_nodes: int, run_proxy: bool): + """ + Returns the number of daliuge nodes available to run workflow + """ + if run_proxy: + ret = num_nodes - 1 # exclude the proxy node + else: + ret = num_nodes - 0 # exclude the data island node? + if ret <= 0: + raise Exception( + "Not enough nodes {0} to run DALiuGE.".format(num_nodes) + ) + return ret diff --git a/daliuge-engine/dlg/deploy/helm_client.py b/daliuge-engine/dlg/deploy/helm_client.py new file mode 100644 index 000000000..7e8b57d4a --- /dev/null +++ b/daliuge-engine/dlg/deploy/helm_client.py @@ -0,0 +1,180 @@ +# +# ICRAR - International Centre for Radio Astronomy Research +# (c) UWA - The University of Western Australia, 2016 +# Copyright by UWA (in the framework of the ICRAR) +# All rights reserved +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# +""" +Contains a module translating physical graphs to kubernetes helm charts. +""" +import json +import re +import time +import os +import sys +import shutil +import pathlib + +import dlg +import yaml +import subprocess +from dlg.common.version import version as dlg_version +from dlg.restutils import RestClient +from dlg.deploy.common import submit + + +def _write_chart(chart_dir, name: str, chart_name: str, version: str, app_version: str, home: str, + description, keywords: list, sources: list, kubeVersion: str): + chart_info = {'apiVersion': "v2", 'name': chart_name, 'type': 'application', 'version': version, + 'appVersion': app_version, 'home': home, 'description': description, + 'keywords': keywords, 'sources': sources, 'kubeVersion': kubeVersion} + # TODO: Fix app_version quotations. + with open(f'{chart_dir}{os.sep}{name}', 'w', encoding='utf-8') as chart_file: + yaml.dump(chart_info, chart_file) + + +def _write_values(chart_dir, config): + with open(f"{chart_dir}{os.sep}custom-values.yaml", 'w', encoding='utf-8') as value_file: + yaml.dump(config, value_file) + + +def _read_values(chart_dir): + with open(f"{chart_dir}{os.sep}values.yaml", 'r', encoding='utf-8') as old_file: + data = yaml.safe_load(old_file) + with open(f"{chart_dir}{os.sep}values.yaml", 'r', encoding='utf-8') as custom_file: + new_data = yaml.safe_load(custom_file) + data.update(new_data) + return data + + +def _find_resources(pgt_data): + pgt = json.loads(pgt_data) + nodes = list(map(lambda x: x['node'], pgt)) + islands = list(map(lambda x: x['island'], pgt)) + num_islands = len(dict(zip(islands, nodes))) + num_nodes = len(nodes) + return num_islands, num_nodes + + +class HelmClient: + """ + Writes necessary files to launch job with kubernetes. + """ + + def __init__(self, deploy_name, chart_name="daliuge-daemon", deploy_dir="./", + submit=True, chart_version="0.1.0", + value_config=None, physical_graph_file=None, chart_vars=None): + if value_config is None: + value_config = dict() + self._chart_name = chart_name + self._chart_vars = {'name': 'daliuge-daemon', + 'appVersion': 'v1.0.0', + 'home': 'https://github.com/ICRAR/daliuge/daliuge-k8s', + 'description': 'DALiuGE k8s deployment', + 'keywords': ['daliuge', 'workflow'], + 'sources': ['https://github.com/ICRAR/daliuge/daliuge-k8s'], + 'kubeVersion': ">=1.10.0-0" + } + if chart_vars is not None: + self._chart_vars.update(chart_vars) + self._deploy_dir = deploy_dir + self._chart_dir = os.path.join(self._deploy_dir, 'daliuge-daemon') + self._chart_version = chart_version + self._deploy_name = deploy_name + self._submit = submit + self._value_data = value_config if value_config is not None else {} + self._submission_endpoint = None + if physical_graph_file is not None: + self._set_physical_graph(physical_graph_file) + + # Copy in template files. + library_root = pathlib.Path(os.path.dirname(dlg.__file__)).parent.parent + print(library_root) + if sys.version_info >= (3, 8): + shutil.copytree(os.path.join(library_root, 'daliuge-k8s', 'helm'), self._deploy_dir, + dirs_exist_ok=True) + else: + shutil.copytree(os.path.join(library_root, 'daliuge-k8s', 'helm'), self._deploy_dir) + + def _set_physical_graph(self, physical_graph_content): + self._physical_graph_file = physical_graph_content + self._num_islands, self._num_nodes = _find_resources( + self._physical_graph_file) + + def create_helm_chart(self, physical_graph_content): + """ + Translates a physical graph to a kubernetes helm chart. + For now, it will just try to run everything in a single container. + """ + _write_chart(self._chart_dir, 'Chart.yaml', self._chart_name, self._chart_version, + dlg_version, + self._chart_vars['home'], self._chart_vars['description'], + self._chart_vars['keywords'], self._chart_vars['sources'], + self._chart_vars['kubeVersion']) + # Update values.yaml + _write_values(self._chart_dir, self._value_data) + self._value_data = _read_values(self._chart_dir) + # Add charts + # TODO: Add charts to helm + self._set_physical_graph(physical_graph_content) + # Update template + # TODO: Update templates in helm + + def launch_helm(self): + """ + Launches the built helm chart using the most straightforward commands possible. + Assumes all files are prepared and validated. + """ + if self._submit: + os.chdir(self._deploy_dir) + instruction = f'helm install {self._deploy_name} {self._chart_name}/ ' \ + f'--values {self._chart_name}{os.sep}custom-values.yaml' + print(subprocess.check_output([instruction], + shell=True).decode('utf-8')) + query = str(subprocess.check_output(['kubectl get svc -o wide'], shell=True)) + # WARNING: May be problematic later if multiple services are running + pattern = r"-service\s*ClusterIP\s*\d+\.\d+\.\d+\.\d+" + ip_pattern = r"\d+\.\d+\.\d+\.\d+" + outcome = re.search(pattern, query) + if outcome: + manager_ip = re.search(ip_pattern, outcome.string) + self._submission_endpoint = manager_ip.group(0) + client = RestClient(self._submission_endpoint, + self._value_data['service']['daemon']['port']) + data = json.dumps({'nodes': ["127.0.0.1"]}).encode('utf-8') + time.sleep(5) # TODO: Deterministic deployment information + client._POST('/managers/island/start', content=data, + content_type='application/json') + client._POST('/managers/master/start', content=data, + content_type='application/json') + else: + print("Could not find manager IP address") + + else: + print(f"Created helm chart {self._chart_name} in {self._deploy_dir}") + + def teardown(self): + subprocess.check_output(['helm uninstall daliuge-daemon'], shell=True) + + def submit_job(self): + """ + There is a semi-dynamic element to fetching the IPs of Node(s) to deploy to. + Hence, launching the chart and initiating graph execution have been de-coupled. + """ + pg_data = json.loads(self._physical_graph_file) + submit(pg_data, self._submission_endpoint) diff --git a/daliuge-engine/dlg/deploy/remotes.py b/daliuge-engine/dlg/deploy/remotes.py index f885d30fa..38245cd57 100644 --- a/daliuge-engine/dlg/deploy/remotes.py +++ b/daliuge-engine/dlg/deploy/remotes.py @@ -27,7 +27,7 @@ import re import socket -from . import slurm_utils +from . import deployment_utils logger = logging.getLogger(__name__) @@ -177,7 +177,7 @@ def __init__(self, options, my_ip): self._set_world( int(os.environ["SLURM_PROCID"]), int(os.environ["SLURM_NTASKS"]), - slurm_utils.list_as_string(os.environ["SLURM_NODELIST"]), + deployment_utils.list_as_string(os.environ["SLURM_NODELIST"]), ) diff --git a/daliuge-engine/dlg/deploy/slurm_client.py b/daliuge-engine/dlg/deploy/slurm_client.py new file mode 100644 index 000000000..08f83971c --- /dev/null +++ b/daliuge-engine/dlg/deploy/slurm_client.py @@ -0,0 +1,171 @@ +# +# ICRAR - International Centre for Radio Astronomy Research +# (c) UWA - The University of Western Australia, 2016 +# Copyright by UWA (in the framework of the ICRAR) +# All rights reserved +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# +""" +Contains a slurm client which generates slurm scripts from daliuge graphs. +""" + +import datetime +import sys +import os +import subprocess +from dlg.runtime import __git_version__ as git_commit + +from dlg.deploy.configs import ConfigFactory, init_tpl +from deployment_constants import DEFAULT_AWS_MON_PORT, DEFAULT_AWS_MON_HOST +from deployment_utils import find_numislands, label_job_dur + + +class SlurmClient: + """ + parameters we can control: + + 1. user group / account name (Required) + 2. whether to submit a graph, and if so provide graph path + 3. # of nodes (of Drop Managers) + 4. how long to run + 5. whether to produce offline graph vis + 6. whether to attach proxy for remote monitoring, and if so provide + DLG_MON_HOST + DLG_MON_PORT + 7. Root directory of the Log files (Required) + """ + + def __init__( + self, + log_root=None, + acc=None, + physical_graph_template_data=None, # JSON formatted physical graph template + logical_graph=None, + job_dur=30, + num_nodes=None, + run_proxy=False, + mon_host=DEFAULT_AWS_MON_HOST, + mon_port=DEFAULT_AWS_MON_PORT, + logv=1, + facility=None, + zerorun=False, + max_threads=0, + sleepncopy=False, + num_islands=None, + all_nics=False, + check_with_session=False, + submit=True, + pip_name=None, + ): + self._config = ConfigFactory.create_config(facility=facility) + self._acc = self._config.getpar("acc") if (acc is None) else acc + self._log_root = ( + self._config.getpar("log_root") if (log_root is None) else log_root + ) + self.modules = self._config.getpar("modules") + self._num_nodes = num_nodes + self._job_dur = job_dur + self._logical_graph = logical_graph + self._physical_graph_template_data = physical_graph_template_data + self._visualise_graph = False + self._run_proxy = run_proxy + self._mon_host = mon_host + self._mon_port = mon_port + self._pip_name = pip_name + self._logv = logv + self._zerorun = zerorun + self._max_threads = max_threads + self._sleepncopy = sleepncopy + self._num_islands = num_islands + self._all_nics = all_nics + self._check_with_session = check_with_session + self._submit = submit + self._dtstr = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") # .%f + self._num_islands, self._num_nodes, self._pip_name = find_numislands( + self._physical_graph_template_data) + + def get_log_dirname(self): + """ + (pipeline name_)[Nnum_of_daliuge_nodes]_[time_stamp] + """ + # Moved setting of dtstr to init + # to ensure it doesn't change for this instance of SlurmClient() + # dtstr = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") # .%f + graph_name = self._pip_name.split('_')[0] # use only the part of the graph name + return "{0}_{1}".format(graph_name, self._dtstr) + + def create_job_desc(self, physical_graph_file): + """ + Creates the slurm script from a physical graph + """ + log_dir = "{0}/{1}".format(self._log_root, self.get_log_dirname()) + pardict = dict() + pardict["NUM_NODES"] = str(self._num_nodes) + pardict["PIP_NAME"] = self._pip_name + pardict["SESSION_ID"] = os.path.split(log_dir)[-1] + pardict["JOB_DURATION"] = label_job_dur(self._job_dur) + pardict["ACCOUNT"] = self._acc + pardict["PY_BIN"] = sys.executable + pardict["LOG_DIR"] = log_dir + pardict["GRAPH_PAR"] = ( + '-L "{0}"'.format(self._logical_graph) + if self._logical_graph + else '-P "{0}"'.format(physical_graph_file) + if physical_graph_file + else "" + ) + pardict["PROXY_PAR"] = ( + "-m %s -o %d" % (self._mon_host, self._mon_port) if self._run_proxy else "" + ) + pardict["GRAPH_VIS_PAR"] = "-d" if self._visualise_graph else "" + pardict["LOGV_PAR"] = "-v %d" % self._logv + pardict["ZERORUN_PAR"] = "-z" if self._zerorun else "" + pardict["MAXTHREADS_PAR"] = "-t %d" % self._max_threads + pardict["SNC_PAR"] = "--app 1" if self._sleepncopy else "--app 0" + pardict["NUM_ISLANDS_PAR"] = "-s %d" % self._num_islands + pardict["ALL_NICS"] = "-u" if self._all_nics else "" + pardict["CHECK_WITH_SESSION"] = "-S" if self._check_with_session else "" + pardict["MODULES"] = self.modules + + job_desc = init_tpl.safe_substitute(pardict) + return job_desc + + def submit_job(self): + """ + Submits the slurm script to the cluster + """ + log_dir = "{0}/{1}".format(self._log_root, self.get_log_dirname()) + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + physical_graph_file_name = "{0}/{1}".format(log_dir, self._pip_name) + with open(physical_graph_file_name, 'w') as physical_graph_file: + physical_graph_file.write(self._physical_graph_template_data) + physical_graph_file.close() + + job_file_name = "{0}/jobsub.sh".format(log_dir) + job_desc = self.create_job_desc(physical_graph_file_name) + with open(job_file_name, "w") as job_file: + job_file.write(job_desc) + + with open(os.path.join(log_dir, "git_commit.txt"), "w") as git_file: + git_file.write(git_commit) + if self._submit: + os.chdir(log_dir) # so that slurm logs will be dumped here + print(subprocess.check_output(["sbatch", job_file_name])) + else: + print(f"Created job submission script {job_file_name}") diff --git a/daliuge-engine/dlg/deploy/start_dlg_cluster.py b/daliuge-engine/dlg/deploy/start_dlg_cluster.py index 193e8d30b..9ada67b0e 100644 --- a/daliuge-engine/dlg/deploy/start_dlg_cluster.py +++ b/daliuge-engine/dlg/deploy/start_dlg_cluster.py @@ -55,14 +55,13 @@ MASTER_DEFAULT_REST_PORT, ) - DIM_WAIT_TIME = 60 MM_WAIT_TIME = DIM_WAIT_TIME GRAPH_SUBMIT_WAIT_TIME = 10 GRAPH_MONITOR_INTERVAL = 5 VERBOSITY = "5" -logger = logging.getLogger("deploy.dlg.cluster") -apps = ( +LOGGER = logging.getLogger("deploy.dlg.cluster") +APPS = ( None, "test.graphsRepository.SleepApp", "test.graphsRepository.SleepAndCopyApp", @@ -81,9 +80,9 @@ def check_host(host, port, timeout=5, check_with_session=False): try: session_id = str(uuid.uuid4()) - with NodeManagerClient(host, port, timeout=timeout) as c: - c.create_session(session_id) - c.destroy_session(session_id) + with NodeManagerClient(host, port, timeout=timeout) as client: + client.create_session(session_id) + client.destroy_session(session_id) return True except: return False @@ -95,31 +94,31 @@ def check_hosts(ips, port, timeout=None, check_with_session=False, retry=1): given timeout, and returns the list of IPs that were found to be up. """ - def check_and_add(ip): + def check_and_add(ip_addr): ntries = retry while ntries: if check_host( - ip, port, timeout=timeout, check_with_session=check_with_session + ip_addr, port, timeout=timeout, check_with_session=check_with_session ): - logger.info("Host %s:%d is running", ip, port) - return ip - logger.warning("Failed to contact host %s:%d", ip, port) + LOGGER.info("Host %s:%d is running", ip_addr, port) + return ip_addr + LOGGER.warning("Failed to contact host %s:%d", ip_addr, port) ntries -= 1 return None # Don't return None values - tp = multiprocessing.pool.ThreadPool(min(50, len(ips))) - up = tp.map(check_and_add, ips) - tp.close() - tp.join() + thread_pool = multiprocessing.pool.ThreadPool(min(50, len(ips))) + result_pool = thread_pool.map(check_and_add, ips) + thread_pool.close() + thread_pool.join() - return [ip for ip in up if ip] + return [ip for ip in result_pool if ip] def get_ip_via_ifconfig(iface_index): out = subprocess.check_output("ifconfig") ifaces_info = list(filter(None, out.split(b"\n\n"))) - logger.info("Found %d interfaces, getting %d", len(ifaces_info), iface_index) + LOGGER.info("Found %d interfaces, getting %d", len(ifaces_info), iface_index) for line in ifaces_info[iface_index].splitlines(): line = line.strip() if line.startswith(b"inet"): @@ -130,27 +129,29 @@ def get_ip_via_ifconfig(iface_index): def get_ip_via_netifaces(iface_index): return utils.get_local_ip_addr()[iface_index][0] + def get_workspace_dir(log_dir): """ Common workspace dir for all nodes just underneath main session directory """ - return(f"{os.path.split(log_dir)[0]}/workspace") + return f"{os.path.split(log_dir)[0]}/workspace" + def start_node_mgr( - log_dir, my_ip, logv=1, max_threads=0, host=None, event_listeners="" + log_dir, my_ip, logv=1, max_threads=0, host=None, event_listeners="" ): """ Start node manager """ - logger.info("Starting node manager on host %s", my_ip) + LOGGER.info("Starting node manager on host %s", my_ip) host = host or "0.0.0.0" - lv = "v" * logv + log_level = "v" * logv args = [ "-l", log_dir, "-w", get_workspace_dir(log_dir), - "-%s" % lv, + "-%s" % log_level, "-H", host, "-m", @@ -163,25 +164,24 @@ def start_node_mgr( ] # return cmdline.dlgNM(optparse.OptionParser(), args) proc = tool.start_process("nm", args) - logger.info("Node manager process started with pid %d", proc.pid) + LOGGER.info("Node manager process started with pid %d", proc.pid) return proc - def start_dim(node_list, log_dir, origin_ip, logv=1): """ Start data island manager """ - logger.info( + LOGGER.info( "Starting island manager on host %s for node managers %r", origin_ip, node_list ) - lv = "v" * logv + log_level = "v" * logv args = [ "-l", log_dir, "-w", get_workspace_dir(log_dir), - "-%s" % lv, + "-%s" % log_level, "-N", ",".join(node_list), "-H", @@ -190,7 +190,7 @@ def start_dim(node_list, log_dir, origin_ip, logv=1): "2048", ] proc = tool.start_process("dim", args) - logger.info("Island manager process started with pid %d", proc.pid) + LOGGER.info("Island manager process started with pid %d", proc.pid) return proc @@ -200,7 +200,7 @@ def start_mm(node_list, log_dir, logv=1): node_list: a list of node address that host DIMs """ - lv = "v" * logv + log_level = "v" * logv parser = optparse.OptionParser() args = [ "-l", @@ -209,7 +209,7 @@ def start_mm(node_list, log_dir, logv=1): get_workspace_dir(log_dir), "-N", ",".join(node_list), - "-%s" % lv, + "-%s" % log_level, "-H", "0.0.0.0", "-m", @@ -222,10 +222,10 @@ def _stop(endpoints): def _the_stop(endpoint): common.BaseDROPManagerClient(endpoint[0], endpoint[1]).stop() - tp = multiprocessing.pool.ThreadPool(min(50, len(endpoints))) - tp.map(_the_stop, endpoints) - tp.close() - tp.join() + thread_pool = multiprocessing.pool.ThreadPool(min(50, len(endpoints))) + thread_pool.map(_the_stop, endpoints) + thread_pool.close() + thread_pool.join() def stop_nms(ips): @@ -236,16 +236,17 @@ def stop_dims(ips): _stop([(ip, ISLAND_DEFAULT_REST_PORT) for ip in ips]) -def stop_mm(ip): - _stop([(ip, MASTER_DEFAULT_REST_PORT)]) +def stop_mm(ip_addr): + _stop([(ip_addr, MASTER_DEFAULT_REST_PORT)]) -def submit_and_monitor(pg, opts, port): +def submit_and_monitor(physical_graph, opts, port): def _task(): dump_path = None if opts.dump: dump_path = os.path.join(opts.log_dir, "status-monitoring.json") - session_id = common.submit(pg, host="127.0.0.1", port=port, session_id=opts.ssid) + session_id = common.submit(physical_graph, host="127.0.0.1", port=port, + session_id=opts.ssid) while True: try: common.monitor_sessions( @@ -253,11 +254,11 @@ def _task(): ) break except: - logger.exception("Monitoring failed, restarting it") + LOGGER.exception("Monitoring failed, restarting it") - t = threading.Thread(target=_task) - t.start() - return t + threads = threading.Thread(target=_task) + threads.start() + return threads def start_proxy(dlg_host, dlg_port, monitor_host, monitor_port): @@ -271,10 +272,10 @@ def start_proxy(dlg_host, dlg_port, monitor_host, monitor_port): try: server.loop() except KeyboardInterrupt: - logger.warning("Ctrl C - Stopping DALiuGE Proxy server") + LOGGER.warning("Ctrl C - Stopping DALiuGE Proxy server") sys.exit(1) except Exception: - logger.exception("DALiuGE proxy terminated unexpectedly") + LOGGER.exception("DALiuGE proxy terminated unexpectedly") sys.exit(1) @@ -290,13 +291,13 @@ def get_pg(opts, nms, dims): """Gets the Physical Graph that is eventually submitted to the cluster, if any""" if not opts.logical_graph and not opts.physical_graph: - return + return [] num_nms = len(nms) num_dims = len(dims) if opts.logical_graph: unrolled = pg_generator.unroll( - opts.logical_graph, opts.ssid, opts.zerorun, apps[opts.app] + opts.logical_graph, opts.ssid, opts.zerorun, APPS[opts.app] ) algo_params = tool.parse_partition_algo_params(opts.algo_params) pgt = pg_generator.partition( @@ -308,8 +309,8 @@ def get_pg(opts, nms, dims): ) del unrolled # quickly dispose of potentially big object else: - with open(opts.physical_graph, "rb") as f: - pgt = json.load(f) + with open(opts.physical_graph, "rb") as pg_file: + pgt = json.load(pg_file) # modify the PG as necessary for modifier in opts.pg_modifiers.split(":"): @@ -324,13 +325,13 @@ def get_pg(opts, nms, dims): timeout=MM_WAIT_TIME, retry=3, ) - pg = pg_generator.resource_map(pgt, dims + nms, num_islands=num_dims, - co_host_dim=opts.co_host_dim) + physical_graph = pg_generator.resource_map(pgt, dims + nms, num_islands=num_dims, + co_host_dim=opts.co_host_dim) graph_name = os.path.basename(opts.log_dir) graph_name = f"{graph_name.split('_')[0]}.json" # get just the graph name - with open(os.path.join(opts.log_dir, graph_name), "wt") as f: - json.dump(pg, f) - return pg + with open(os.path.join(opts.log_dir, graph_name), "wt") as pg_file: + json.dump(physical_graph, pg_file) + return physical_graph def get_ip(opts): @@ -342,16 +343,15 @@ def get_remote(opts): my_ip = get_ip(opts) if opts.remote_mechanism == "mpi": return remotes.MPIRemote(opts, my_ip) - elif opts.remote_mechanism == "dlg": + if opts.remote_mechanism == "dlg": return remotes.DALiuGERemote(opts, my_ip) - elif opts.remote_mechanism == "dlg-hybrid": + if opts.remote_mechanism == "dlg-hybrid": return remotes.DALiuGEHybridRemote(opts, my_ip) else: # == 'slurm' return remotes.SlurmRemote(opts, my_ip) def main(): - parser = optparse.OptionParser() parser.add_option( "-l", @@ -568,11 +568,11 @@ def main(): try: print("From netifaces: %s" % get_ip_via_netifaces(options.interface)) except: - logger.exception("Failed to get information via netifaces") + LOGGER.exception("Failed to get information via netifaces") try: print("From ifconfig: %s" % get_ip_via_ifconfig(options.interface)) except: - logger.exception("Failed to get information via ifconfig") + LOGGER.exception("Failed to get information via ifconfig") sys.exit(0) elif options.collect_interfaces: from mpi4py import MPI @@ -587,9 +587,9 @@ def main(): parser.error( "Either a logical graph or physical graph filename must be specified" ) - for p in (options.logical_graph, options.physical_graph): - if p and not os.path.exists(p): - parser.error("Cannot locate graph file at '{0}'".format(p)) + for graph_file_name in (options.logical_graph, options.physical_graph): + if graph_file_name and not os.path.exists(graph_file_name): + parser.error("Cannot locate graph file at '{0}'".format(graph_file_name)) if options.monitor_host is not None and options.num_islands > 1: parser.error("We do not support proxy monitor multiple islands yet") @@ -602,38 +602,30 @@ def main(): log_dir = "{0}/{1}".format(options.log_dir, remote.my_ip) os.makedirs(log_dir) logfile = log_dir + "/start_dlg_cluster.log" - FORMAT = "%(asctime)-15s [%(levelname)5.5s] [%(threadName)15.15s] %(name)s#%(funcName)s:%(lineno)s %(message)s" - logging.basicConfig(filename=logfile, level=logging.DEBUG, format=FORMAT) + log_format = "%(asctime)-15s [%(levelname)5.5s] [%(threadName)15.15s] " \ + "%(name)s#%(funcName)s:%(lineno)s %(message)s" + logging.basicConfig(filename=logfile, level=logging.DEBUG, format=log_format) - logger.info("Starting DALiuGE cluster with %d nodes", remote.size) - logger.debug("Cluster nodes: %r", remote.sorted_peers) - logger.debug("Using %s as the local IP where required", remote.my_ip) + LOGGER.info("Starting DALiuGE cluster with %d nodes", remote.size) + LOGGER.debug("Cluster nodes: %r", remote.sorted_peers) + LOGGER.debug("Using %s as the local IP where required", remote.my_ip) - envfile = os.path.join(log_dir, "env.txt") - logger.debug("Dumping process' environment to %s", envfile) - with open(envfile, "wt") as f: + envfile_name = os.path.join(log_dir, "env.txt") + LOGGER.debug("Dumping process' environment to %s", envfile_name) + with open(envfile_name, "wt") as env_file: for name, value in sorted(os.environ.items()): - f.write("%s=%s\n" % (name, value)) + env_file.write("%s=%s\n" % (name, value)) logv = max(min(3, options.verbose_level), 1) if remote.is_highest_level_manager: nodesfile = os.path.join(log_dir, "nodes.txt") - logger.debug("Dumping list of nodes to %s", nodesfile) - with open(nodesfile, "wt") as f: - f.write("\n".join(remote.sorted_peers)) + LOGGER.debug("Dumping list of nodes to %s", nodesfile) + with open(nodesfile, "wt") as env_file: + env_file.write("\n".join(remote.sorted_peers)) dim_proc = None # start the NM - if remote.is_nm: - nm_proc = start_node_mgr( - log_dir, - remote.my_ip, - logv=logv, - max_threads=options.max_threads, - host=None if options.all_nics else remote.my_ip, - event_listeners=options.event_listeners, - ) if options.num_islands == 1: if remote.is_proxy: # Wait until the Island Manager is open @@ -645,44 +637,44 @@ def main(): options.monitor_port, ) else: - logger.warning( + LOGGER.warning( "Couldn't connect to the main drop manager, proxy not started" ) else: - logger.info(f"Starting island managers on nodes: {remote.dim_ips}") + LOGGER.info(f"Starting island managers on nodes: {remote.dim_ips}") if remote.my_ip in remote.dim_ips: dim_proc = start_dim(remote.nm_ips, log_dir, remote.my_ip, logv=logv) - pg = get_pg(options, remote.nm_ips, remote.dim_ips) + physical_graph = get_pg(options, remote.nm_ips, remote.dim_ips) monitoring_thread = submit_and_monitor( - pg, options, ISLAND_DEFAULT_REST_PORT + physical_graph, options, ISLAND_DEFAULT_REST_PORT ) monitoring_thread.join() stop_dims(remote.dim_ips) stop_nms(remote.nm_ips) if dim_proc is not None: # Stop DALiuGE. - logger.info("Stopping DALiuGE island manager on rank %d", remote.rank) + LOGGER.info("Stopping DALiuGE island manager on rank %d", remote.rank) utils.terminate_or_kill(dim_proc, 5) elif remote.is_highest_level_manager: - pg = get_pg(options, remote.nm_ips, remote.dim_ips) - remote.send_dim_nodes(pg) + physical_graph = get_pg(options, remote.nm_ips, remote.dim_ips) + remote.send_dim_nodes(physical_graph) # 7. make sure all DIMs are up running dim_ips_up = check_hosts( remote.dim_ips, ISLAND_DEFAULT_REST_PORT, timeout=MM_WAIT_TIME, retry=10 ) if len(dim_ips_up) < len(remote.dim_ips): - logger.warning( + LOGGER.warning( "Not all DIMs were up and running: %d/%d", len(dim_ips_up), len(remote.dim_ips), ) monitoring_thread = submit_and_monitor( - pg, options, MASTER_DEFAULT_REST_PORT + physical_graph, options, MASTER_DEFAULT_REST_PORT ) start_mm(remote.dim_ips, log_dir, logv=logv) monitoring_thread.join() diff --git a/daliuge-engine/dlg/deploy/start_helm_cluster.py b/daliuge-engine/dlg/deploy/start_helm_cluster.py new file mode 100644 index 000000000..24ac2577e --- /dev/null +++ b/daliuge-engine/dlg/deploy/start_helm_cluster.py @@ -0,0 +1,124 @@ +# +# ICRAR - International Centre for Radio Astronomy Research +# (c) UWA - The University of Western Australia, 2022 +# Copyright by UWA (in the framework of the ICRAR) +# All rights reserved +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# +""" +A demo implementation of a Helm-based DAliuGE deployment. + +Limitations: +- Assumes graphs will run on a single pod +- Does not support external graph components (yet) +""" +import argparse +import json +import os +import tempfile + +from dlg.dropmake import pg_generator +from dlg.deploy.helm_client import HelmClient + + +def get_pg(opts, node_managers: list, data_island_managers: list): + if not opts.logical_graph and not opts.physical_graph: + return [] + num_nms = len(node_managers) + num_dims = len(data_island_managers) + + if opts.logical_graph: + unrolled_graph = pg_generator.unroll(opts.logical_graph) + pgt = pg_generator.partition(unrolled_graph, algo='metis', num_partitons=num_nms, + num_islands=num_dims) + del unrolled_graph + else: + with open(opts.physical_graph, 'rb', encoding='utf-8') as pg_file: + pgt = json.load(pg_file) + physical_graph = pg_generator.resource_map(pgt, node_managers + data_island_managers) + # TODO: Add dumping to log-dir + return physical_graph + + +def start_helm(physical_graph_template, num_nodes: int, deploy_dir: str): + # TODO: Dynamic helm chart logging dir + # TODO: Multiple node deployments + available_ips = ["127.0.0.1"] + pgt = json.loads(physical_graph_template) + pgt = pg_generator.partition(pgt, algo='metis', num_partitons=len(available_ips), + num_islands=len(available_ips)) + pg = pg_generator.resource_map(pgt, available_ips + available_ips) + helm_client = HelmClient( + deploy_name='daliuge-daemon', + chart_name='daliuge-daemon', + deploy_dir=deploy_dir + ) + try: + helm_client.create_helm_chart(json.dumps(pg)) + helm_client.launch_helm() + helm_client.submit_job() + helm_client.teardown() + except Exception as ex: + raise + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + '-L', + '--logical-graph', + action="store", + type=str, + dest="logical_graph", + help="The filename of the logical graph to deploy", + default=None + ) + parser.add_argument( + "-P", + "--physical_graph", + action="store", + type=str, + dest="physical_graph", + help="The filename of the physical graph (template) to deploy", + default=None, + ) + + options = parser.parse_args() + if bool(options.logical_graph) == bool(options.physical_graph): + parser.error( + "Either a logical graph or physical graph filename must be specified" + ) + for graph_file_name in (options.logical_graph, options.physical_graph): + if graph_file_name and not os.path.exists(graph_file_name): + parser.error(f"Cannot locate graph_file at {graph_file_name}") + + available_ips = ["127.0.0.1"] + physical_graph = get_pg(options, available_ips, available_ips) + + helm_client = HelmClient( + deploy_name='daliuge-daemon', + chart_name='daliuge-daemon', + deploy_dir='/home/nicholas/dlg_temp/demo' + ) + helm_client.create_helm_chart(json.dumps(physical_graph)) + helm_client.launch_helm() + helm_client.submit_job() + helm_client.teardown() + + +if __name__ == "__main__": + main() diff --git a/daliuge-engine/dlg/drop.py b/daliuge-engine/dlg/drop.py index b804a6b4f..495e246b9 100644 --- a/daliuge-engine/dlg/drop.py +++ b/daliuge-engine/dlg/drop.py @@ -76,9 +76,7 @@ if sys.version_info >= (3, 8): from .io import SharedMemoryIO -from .utils import prepare_sql, createDirIfMissing, isabs, object_tracking -from .meta import dlg_float_param, dlg_int_param, dlg_list_param, \ - dlg_string_param, dlg_bool_param, dlg_dict_param +from .utils import prepare_sql, createDirIfMissing, isabs, object_tracking, getDlgVariable from dlg.process import DlgProcess from .meta import ( dlg_float_param, @@ -223,6 +221,10 @@ def __init__(self, oid, uid, **kwargs): self._producers_uids = set() self._producers = ListAsDict(self._producers_uids) + # Matcher used to validate environment_variable_syntax + self._env_var_matcher = re.compile(r"\$[A-z|\d]+\..+") + self._dlg_var_matcher = re.compile(r"\$DLG_.+") + # Set holding the state of the producers that have finished their # execution. Once all producers have finished, this DROP moves # itself to the COMPLETED state @@ -421,6 +423,328 @@ def initialize(self, **kwargs): method implementation, which is usually the case). """ + def incrRefCount(self): + """ + Increments the reference count of this DROP by one atomically. + """ + with self._refLock: + self._refCount += 1 + + def decrRefCount(self): + """ + Decrements the reference count of this DROP by one atomically. + """ + with self._refLock: + self._refCount -= 1 + + @track_current_drop + def open(self, **kwargs): + """ + Opens the DROP for reading, and returns a "DROP descriptor" + that must be used when invoking the read() and close() methods. + DROPs maintain a internal reference count based on the number + of times they are opened for reading; because of that after a successful + call to this method the corresponding close() method must eventually be + invoked. Failing to do so will result in DROPs not expiring and + getting deleted. + """ + if self.status != DROPStates.COMPLETED: + raise Exception( + "%r is in state %s (!=COMPLETED), cannot be opened for reading" + % ( + self, + self.status, + ) + ) + + io = self.getIO() + logger.debug("Opening drop %s" % (self.oid)) + io.open(OpenMode.OPEN_READ, **kwargs) + + # Save the IO object in the dictionary and return its descriptor instead + while True: + descriptor = random.SystemRandom().randint(-(2 ** 31), 2 ** 31 - 1) + if descriptor not in self._rios: + break + self._rios[descriptor] = io + + # This occurs only after a successful opening + self.incrRefCount() + self._fire("open") + + return descriptor + + @track_current_drop + def close(self, descriptor, **kwargs): + """ + Closes the given DROP descriptor, decreasing the DROP's + internal reference count and releasing the underlying resources + associated to the descriptor. + """ + self._checkStateAndDescriptor(descriptor) + + # Decrement counter and then actually close + self.decrRefCount() + io = self._rios.pop(descriptor) + io.close(**kwargs) + + def _closeWriters(self): + """ + Close our writing IO instance. + If written externally, self._wio will have remained None + """ + if self._wio: + try: + self._wio.close() + except: + pass # this will make sure that a previous issue does not cause the graph to hang! + # raise Exception("Problem closing file!") + self._wio = None + + def read(self, descriptor, count=4096, **kwargs): + """ + Reads `count` bytes from the given DROP `descriptor`. + """ + self._checkStateAndDescriptor(descriptor) + io = self._rios[descriptor] + return io.read(count, **kwargs) + + def _checkStateAndDescriptor(self, descriptor): + if self.status != DROPStates.COMPLETED: + raise Exception( + "%r is in state %s (!=COMPLETED), cannot be read" + % ( + self, + self.status, + ) + ) + if descriptor is None: + raise ValueError("Illegal empty descriptor given") + if descriptor not in self._rios: + raise Exception( + "Illegal descriptor %d given, remember to open() first" % (descriptor) + ) + + def isBeingRead(self): + """ + Returns `True` if the DROP is currently being read; `False` + otherwise + """ + with self._refLock: + return self._refCount > 0 + + @track_current_drop + def write(self, data: Union[bytes, memoryview], **kwargs): + """ + Writes the given `data` into this DROP. This method is only meant + to be called while the DROP is in INITIALIZED or WRITING state; + once the DROP is COMPLETE or beyond only reading is allowed. + The underlying storage mechanism is responsible for implementing the + final writing logic via the `self.writeMeta()` method. + """ + + if self.status not in [DROPStates.INITIALIZED, DROPStates.WRITING]: + raise Exception("No more writing expected") + + if not isinstance(data, (bytes, memoryview)): + raise Exception("Data type not of binary type: %s", type(data).__name__) + + # We lazily initialize our writing IO instance because the data of this + # DROP might not be written through this DROP + if not self._wio: + self._wio = self.getIO() + try: + self._wio.open(OpenMode.OPEN_WRITE) + except: + self.status = DROPStates.ERROR + raise Exception("Problem opening drop for write!") + nbytes = self._wio.write(data) + + dataLen = len(data) + if nbytes != dataLen: + # TODO: Maybe this should be an actual error? + logger.warning( + "Not all data was correctly written by %s (%d/%d bytes written)" + % (self, nbytes, dataLen) + ) + + # see __init__ for the initialization to None + if self._size is None: + self._size = 0 + self._size += nbytes + + # Trigger our streaming consumers + if self._streamingConsumers: + for streamingConsumer in self._streamingConsumers: + streamingConsumer.dataWritten(self.uid, data) + + # Update our internal checksum + if not checksum_disabled: + self._updateChecksum(data) + + # If we know how much data we'll receive, keep track of it and + # automatically switch to COMPLETED + if self._expectedSize > 0: + remaining = self._expectedSize - self._size + if remaining > 0: + self.status = DROPStates.WRITING + else: + if remaining < 0: + logger.warning( + "Received and wrote more bytes than expected: " + + str(-remaining) + ) + logger.debug( + "Automatically moving %r to COMPLETED, all expected data arrived" + % (self,) + ) + self.setCompleted() + else: + self.status = DROPStates.WRITING + + return nbytes + + def autofill_environment_variables(self): + """ + Runs through all parameters here, fetching those which match the env-var syntax when + discovered. + """ + for param_key, param_val in self.parameters.items(): + if self._env_var_matcher.fullmatch(str(param_val)): + self.parameters[param_key] = self.get_environment_variable(param_val) + if self._dlg_var_matcher.fullmatch(str(param_val)): + self.parameters[param_key] = getDlgVariable(param_val) + + def get_environment_variable(self, key: str): + """ + Expects keys of the form $store_name.var_name + $store_name.var_name.sub_var_name will query store_name for var_name.sub_var_name + """ + if self._dlg_var_matcher.fullmatch(key): + return getDlgVariable(key) + if len(key) < 2 or key[0] != '$': + # Reject malformed entries + return None + key_edit = key[1:] + env_var_ref, env_var_key = key_edit.split('.')[0], '.'.join(key_edit.split('.')[1:]) + env_var_drop = None + for producer in self.producers: + if producer.name == env_var_ref: + env_var_drop = producer + if env_var_drop is not None: # TODO: Check for KeyValueDROP interface support + return env_var_drop.get(env_var_key) + else: + return None + + def get_environment_variables(self, keys: list): + """ + Expects multiple instances of the single key form + """ + return_values = [] + for key in keys: + # TODO: Accumulate calls to the same env_var_store to save communication + return_values.append(self.get_environment_variable(key)) + return return_values + + @abstractmethod + def getIO(self) -> DataIO: + """ + Returns an instance of one of the `dlg.io.DataIO` instances that + handles the data contents of this DROP. + """ + + def delete(self): + """ + Deletes the data represented by this DROP. + """ + self.getIO().delete() + + def exists(self): + """ + Returns `True` if the data represented by this DROP exists indeed + in the underlying storage mechanism + """ + return self.getIO().exists() + + @abstractmethod + def dataURL(self): + """ + A URL that points to the data referenced by this DROP. Different + DROP implementations will use different URI schemes. + """ + + def _updateChecksum(self, chunk): + # see __init__ for the initialization to None + if self._checksum is None: + self._checksum = 0 + self._checksumType = _checksumType + self._checksum = crc32c(chunk, self._checksum) + + @property + def checksum(self): + """ + The checksum value for the data represented by this DROP. Its + value is automatically calculated if the data was actually written + through this DROP (using the `self.write()` method directly or + indirectly). In the case that the data has been externally written, the + checksum can be set externally after the DROP has been moved to + COMPLETED or beyond. + + :see: `self.checksumType` + """ + if self.status == DROPStates.COMPLETED and self._checksum is None: + # Generate on the fly + io = self.getIO() + io.open(OpenMode.OPEN_READ) + data = io.read(4096) + while data is not None and len(data) > 0: + self._updateChecksum(data) + data = io.read(4096) + io.close() + return self._checksum + + @checksum.setter + def checksum(self, value): + if self._checksum is not None: + raise Exception( + "The checksum for DROP %s is already calculated, cannot overwrite with new value" + % (self) + ) + if self.status in [DROPStates.INITIALIZED, DROPStates.WRITING]: + raise Exception( + "DROP %s is still not fully written, cannot manually set a checksum yet" + % (self) + ) + self._checksum = value + + @property + def checksumType(self): + """ + The algorithm used to compute this DROP's data checksum. Its value + if automatically set if the data was actually written through this + DROP (using the `self.write()` method directly or indirectly). In + the case that the data has been externally written, the checksum type + can be set externally after the DROP has been moved to COMPLETED + or beyond. + + :see: `self.checksum` + """ + return self._checksumType + + @checksumType.setter + def checksumType(self, value): + if self._checksumType is not None: + raise Exception( + "The checksum type for DROP %s is already set, cannot overwrite with new value" + % (self) + ) + if self.status in [DROPStates.INITIALIZED, DROPStates.WRITING]: + raise Exception( + "DROP %s is still not fully written, cannot manually set a checksum type yet" + % (self) + ) + self._checksumType = value + @property def oid(self): """ @@ -1193,7 +1517,8 @@ def dataURL(self) -> str: # @brief File # @details A standard file on a filesystem mounted to the deployment machine # @par EAGLE_START -# @par category File +# @param category File +# @param tag template # @param[in] param/data_volume Data volume/5/Float/readwrite/ # \~English Estimated size of the data contained in this node # @param[in] param/group_end Group end/False/Boolean/readwrite/ @@ -1369,7 +1694,8 @@ def dataURL(self): # @brief NGAS # @details An archive on the Next Generation Archive System (NGAS). # @par EAGLE_START -# @par category File +# @param category NGAS +# @param tag template # @param[in] param/data_volume Data volume/5/Float/readwrite/ # \~English Estimated size of the data contained in this node # @param[in] param/group_end Group end/False/Boolean/readwrite/ @@ -1489,7 +1815,8 @@ def dataURL(self): # @brief Memory # @details In-memory storage of intermediate data products # @par EAGLE_START -# @par category Memory +# @param category Memory +# @param tag template # @param[in] param/data_volume Data volume/5/Float/readwrite/ # \~English Estimated size of the data contained in this node # @param[in] param/group_end Group end/False/Boolean/readwrite/ @@ -2330,7 +2657,17 @@ def initialize(self, **kwargs): kwargs["n_effective_inputs"] = -1 super(BarrierAppDROP, self).initialize(**kwargs) - +## +# @brief Branch +# @details A conditional branch to control flow +# @par EAGLE_START +# @param category Branch +# @param tag template +# @param[in] param/appclass Application Class/dlg.apps.simple.SimpleBranch/String/readonly/ +# \~English Application class +# @param[in] param/group_start Group start/False/Boolean/readwrite/ +# \~English Is this node the start of a group? +# @par EAGLE_END class BranchAppDrop(BarrierAppDROP): """ A special kind of application with exactly two outputs. After normal @@ -2351,6 +2688,168 @@ def execute(self, _send_notifications=True): self._notifyAppIsFinished() +## +# @brief Plasma +# @details An object in a Apache Arrow Plasma in-memory object store +# @par EAGLE_START +# @param category Plasma +# @param tag template +# @param[in] param/data_volume Data volume/5/Float/readwrite/ +# \~English Estimated size of the data contained in this node +# @param[in] param/group_end Group end/False/Boolean/readwrite/ +# \~English Is this node the end of a group? +# @param[in] param/plasma_path Plasma Path//String/readwrite/ +# \~English Path to the local plasma store +# @param[in] param/object_id Object Id//String/readwrite/ +# \~English PlasmaId of the object for all compute nodes +# @param[in] param/use_staging Use Staging/False/Boolean/readwrite/ +# \~English Enables writing to a dynamically resizeable staging buffer +# @par EAGLE_END +class PlasmaDROP(AbstractDROP): + """ + A DROP that points to data stored in a Plasma Store + """ + + plasma_path = dlg_string_param("plasma_path", "/tmp/plasma") + object_id = dlg_string_param("object_id", None) + use_staging = dlg_bool_param("use_staging", False) + + def initialize(self, **kwargs): + object_id = self.uid + if len(self.uid) != 20: + object_id = np.random.bytes(20) + if not self.object_id: + self.object_id = object_id + + def getIO(self): + return PlasmaIO(plasma.ObjectID(self.object_id), + self.plasma_path, + expected_size=self._expectedSize, + use_staging=self.use_staging) + + @property + def dataURL(self): + return "plasma://%s" % (binascii.hexlify(self.object_id).decode("ascii")) + + +## +# @brief PlasmaFlight +# @details An Apache Arrow Flight server providing distributed access +# to a Plasma in-memory object store +# @par EAGLE_START +# @param category PlasmaFlight +# @param tag template +# @param[in] param/data_volume Data volume/5/Float/readwrite/ +# \~English Estimated size of the data contained in this node +# @param[in] param/group_end Group end/False/Boolean/readwrite/ +# \~English Is this node the end of a group? +# @param[in] param/plasma_path Plasma Path//String/readwrite/ +# \~English Path to the local plasma store +# @param[in] param/object_id Object Id//String/readwrite/ +# \~English PlasmaId of the object for all compute nodes +# @param[in] param/flight_path Flight Path//String/readwrite/ +# \~English IP and flight port of the drop owner +# @par EAGLE_END +class PlasmaFlightDROP(AbstractDROP): + """ + A DROP that points to data stored in a Plasma Store + """ + + object_id = dlg_string_param("object_id", None) + plasma_path = dlg_string_param("plasma_path", "/tmp/plasma") + flight_path = dlg_string_param("flight_path", None) + + def initialize(self, **kwargs): + object_id = self.uid + if len(self.uid) != 20: + object_id = np.random.bytes(20) + if self.object_id is None: + self.object_id = object_id + + def getIO(self): + if isinstance(self.object_id, str): + object_id = plasma.ObjectID(self.object_id.encode("ascii")) + elif isinstance(self.object_id, bytes): + object_id = plasma.ObjectID(self.object_id) + else: + raise Exception( + "Invalid argument " + + str(self.object_id) + + " expected str, got" + + str(type(self.object_id)) + ) + return PlasmaFlightIO( + object_id, + self.plasma_path, + flight_path=self.flight_path, + size=self._expectedSize, + ) + + @property + def dataURL(self): + return "plasmaflight://%s" % (binascii.hexlify(self.object_id).decode("ascii")) + +## +# @brief ParameterSet +# @details A set of parameters, wholly specified in EAGLE +# @par EAGLE_START +# @param category ParameterSet +# @param tag template +# @param[in] param/mode Parset mode/"YANDA"/String/readonly/False/To what standard DALiuGE should filter and serialize the parameters. +# @param[in] param/config_data ConfigData/""/String/readwrite/False/Additional configuration information to be mixed in with the initial data +# @param[out] port/Config ConfigFile/File/The output configuration file +# @par EAGLE_END +class ParameterSetDROP(AbstractDROP): + """ + A generic configuration file template wrapper + This drop opens an (optional) file containing some initial configuration information, then + appends any additional specified parameters to it, finally serving it as a data object. + """ + + config_data = b'' + + mode = dlg_string_param('mode', None) + + @abstractmethod + def serialize_parameters(self, parameters: dict, mode): + """ + Returns a string representing a serialization of the parameters. + """ + if mode == "YANDA": + # TODO: Add more complex value checking + return "\n".join(f"{x}={y}" for x, y in parameters.items()) + # Add more formats (.ini for example) + return "\n".join(f"{x}={y}" for x, y in parameters.items()) + + @abstractmethod + def filter_parameters(self, parameters: dict, mode): + """ + Returns a dictionary of parameters, with daliuge-internal or other parameters filtered out + """ + if mode == 'YANDA': + forbidden_params = list(DEFAULT_INTERNAL_PARAMETERS) + if parameters['config_data'] == "": + forbidden_params.append('configData') + return {key: val for key, val in parameters.items() if + key not in DEFAULT_INTERNAL_PARAMETERS} + return parameters + + def initialize(self, **kwargs): + """ + TODO: Open input file + """ + self.config_data = self.serialize_parameters( + self.filter_parameters(self.parameters, self.mode), self.mode).encode('utf-8') + + def getIO(self): + return MemoryIO(io.BytesIO(self.config_data)) + + @property + def dataURL(self): + hostname = os.uname()[1] + return f"config://{hostname}/{os.getpid()}/{id(self.config_data)}" + + # Dictionary mapping 1-to-many DROPLinkType constants to the corresponding methods # used to append a a DROP into a relationship collection of another # (e.g., one uses `addConsumer` to add a DROPLinkeType.CONSUMER DROP into diff --git a/daliuge-engine/dlg/droputils.py b/daliuge-engine/dlg/droputils.py index 8cfbf91c6..d8f9bf6af 100644 --- a/daliuge-engine/dlg/droputils.py +++ b/daliuge-engine/dlg/droputils.py @@ -446,6 +446,30 @@ def replace_dataurl_placeholders(cmd, inputs, outputs): return cmd +def serialize_applicationArgs(applicationArgs, prefix='--', separator=' '): + """ + Unpacks the applicationArgs dictionary and returns a string + that can be used as command line parameters. + """ + if not isinstance(applicationArgs, dict): + logger.info("applicationArgs are not passed as a dict. Ignored!") + # construct the actual command line from all application parameters + args = [] + + for (name, value) in applicationArgs.items(): + if value in [None, False, ""]: + continue + elif value is True: + value = '' + # short and long version of keywords + if prefix == "--" and len(name) == 1: + arg = [f'-{name} {value}'] + else: + arg = [f'{prefix}{name}{separator}{value}'.strip()] + args += arg # remove unneccesary blanks + + return f"{' '.join(args)}" + # Easing the transition from single- to multi-package get_leaves = common.get_leaves diff --git a/daliuge-engine/dlg/environmentvar_drop.py b/daliuge-engine/dlg/environmentvar_drop.py new file mode 100644 index 000000000..da58e593c --- /dev/null +++ b/daliuge-engine/dlg/environmentvar_drop.py @@ -0,0 +1,100 @@ +# +# ICRAR - International Centre for Radio Astronomy Research +# (c) UWA - The University of Western Australia, 2014 +# Copyright by UWA (in the framework of the ICRAR) +# All rights reserved +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# +import abc +import io +import os +import json + +from dlg.drop import AbstractDROP, DEFAULT_INTERNAL_PARAMETERS +from dlg.io import MemoryIO + + +class KeyValueDROP: + + @abc.abstractmethod + def get(self, key): + """ + Returns the value stored by this drop for the given key. Returns None if not present + """ + + @abc.abstractmethod + def get_multiple(self, keys: list): + """ + Returns a list of values stored by this drop. Maintains order returning None for any keys + not present. + """ + + # TODO: Implement Set(key, value) operations + @abc.abstractmethod + def set(self, key, value): + """ + Should update a value in the key-val store or add a new values if not present. + """ + # Will be difficult to handle shared memory considerations. + # For such a job, using a REDIS or other distributed key-val store may be more appropriate. + + +def _filter_parameters(parameters: dict): + return {key: val for key, val in parameters.items() if + key not in DEFAULT_INTERNAL_PARAMETERS} + + +## +# @brief Environment variables +# @details A set of environment variables, wholly specified in EAGLE and accessible to all drops. +# @par EAGLE_START +# @param category EnvironmentVars +# @par EAGLE_END +class EnvironmentVarDROP(AbstractDROP, KeyValueDROP): + """ + Drop storing static variables for access by all drops. + Functions effectively like a globally-available Python dictionary + """ + + def initialize(self, **kwargs): + """ + Runs through all parameters, putting each into this drop's variable dict + """ + super(EnvironmentVarDROP, self).initialize(**kwargs) + self._variables = dict() + self._variables.update(_filter_parameters(self.parameters)) + + def getIO(self): + return MemoryIO(io.BytesIO(json.dumps(self._variables).encode('utf-8'))) + + def get(self, key): + return self._variables.get(key) + + def get_multiple(self, keys: list): + return_vars = [] + for key in keys: + return_vars.append(self._variables.get(key)) + return return_vars + + def set(self, key, value): + raise NotImplementedError( + 'Setting EnvironmentVariables mid-execution is not currently implemented') + + @property + def dataURL(self): + hostname = os.uname()[1] + return f"config://{hostname}/{os.getpid()}/{id(self._variables)}" diff --git a/daliuge-engine/dlg/graph_loader.py b/daliuge-engine/dlg/graph_loader.py index 48cd193f3..1929d9de2 100644 --- a/daliuge-engine/dlg/graph_loader.py +++ b/daliuge-engine/dlg/graph_loader.py @@ -42,9 +42,9 @@ NullDROP, EndDROP, PlasmaDROP, - PlasmaFlightDROP, - ParameterSetDROP + PlasmaFlightDROP ) +from dlg.parset_drop import ParameterSetDROP from .exceptions import InvalidGraphException from .json_drop import JsonDROP from .common import Categories, DropType diff --git a/daliuge-engine/dlg/parset_drop.py b/daliuge-engine/dlg/parset_drop.py new file mode 100644 index 000000000..53b5ea03e --- /dev/null +++ b/daliuge-engine/dlg/parset_drop.py @@ -0,0 +1,88 @@ +# +# ICRAR - International Centre for Radio Astronomy Research +# (c) UWA - The University of Western Australia, 2014 +# Copyright by UWA (in the framework of the ICRAR) +# All rights reserved +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# +import io +import os +from abc import abstractmethod + +from dlg.drop import AbstractDROP, DEFAULT_INTERNAL_PARAMETERS +from dlg.io import MemoryIO +from dlg.meta import dlg_string_param + + +## +# @brief ParameterSet +# @details A set of parameters, wholly specified in EAGLE +# @par EAGLE_START +# @param category ParameterSet +# @param[in] param/mode Parset mode/"YANDA"/String/readonly/False/To what standard DALiuGE should filter and serialize the parameters. +# @param[in] param/config_data ConfigData/""/String/readwrite/False/Additional configuration information to be mixed in with the initial data +# @param[out] port/Config ConfigFile/File/The output configuration file +# @par EAGLE_END +class ParameterSetDROP(AbstractDROP): + """ + A generic configuration file template wrapper + This drop opens an (optional) file containing some initial configuration information, then + appends any additional specified parameters to it, finally serving it as a data object. + """ + + config_data = b'' + + mode = dlg_string_param('mode', None) + + @abstractmethod + def serialize_parameters(self, parameters: dict, mode): + """ + Returns a string representing a serialization of the parameters. + """ + if mode == "YANDA": + # TODO: Add more complex value checking + return "\n".join(f"{x}={y}" for x, y in parameters.items()) + # Add more formats (.ini for example) + return "\n".join(f"{x}={y}" for x, y in parameters.items()) + + @abstractmethod + def filter_parameters(self, parameters: dict, mode): + """ + Returns a dictionary of parameters, with daliuge-internal or other parameters filtered out + """ + if mode == 'YANDA': + forbidden_params = list(DEFAULT_INTERNAL_PARAMETERS) + if parameters['config_data'] == "": + forbidden_params.append('configData') + return {key: val for key, val in parameters.items() if + key not in DEFAULT_INTERNAL_PARAMETERS} + return parameters + + def initialize(self, **kwargs): + """ + TODO: Open input file + """ + self.config_data = self.serialize_parameters( + self.filter_parameters(self.parameters, self.mode), self.mode).encode('utf-8') + + def getIO(self): + return MemoryIO(io.BytesIO(self.config_data)) + + @property + def dataURL(self): + hostname = os.uname()[1] + return f"config://{hostname}/{os.getpid()}/{id(self.config_data)}" diff --git a/daliuge-engine/dlg/s3_drop.py b/daliuge-engine/dlg/s3_drop.py index ea6529c26..8ecba7535 100644 --- a/daliuge-engine/dlg/s3_drop.py +++ b/daliuge-engine/dlg/s3_drop.py @@ -34,7 +34,8 @@ # @brief S3 # @details A 'bucket' object available on Amazon's Simple Storage Service (S3) # @par EAGLE_START -# @par category S3 +# @param category S3 +# @param tag template # @param[in] param/data_volume Data volume/5/Float/readwrite/ # \~English Estimated size of the data contained in this node # @param[in] param/group_end Group end/False/Boolean/readwrite/ diff --git a/daliuge-engine/dlg/utils.py b/daliuge-engine/dlg/utils.py index 780330159..81fa16975 100644 --- a/daliuge-engine/dlg/utils.py +++ b/daliuge-engine/dlg/utils.py @@ -40,7 +40,6 @@ from . import common - logger = logging.getLogger(__name__) @@ -218,6 +217,16 @@ def getDlgPath(): return os.path.join(getDlgDir(), "code") +def getDlgVariable(key: str): + """ + Queries environment for variables assumed to start with 'DLG_'. + Special case for DLG_ROOT, since this is easily identifiable. + """ + if key == "$DLG_ROOT": + return getDlgDir() + return os.environ.get(key[1:]) + + def createDirIfMissing(path): """ Creates the given directory if it doesn't exist diff --git a/daliuge-engine/docker/Dockerfile b/daliuge-engine/docker/Dockerfile index 5b619bfc0..87f42cd55 100644 --- a/daliuge-engine/docker/Dockerfile +++ b/daliuge-engine/docker/Dockerfile @@ -22,6 +22,6 @@ EXPOSE 8002 # enable the virtualenv path from daliuge-common ENV VIRTUAL_ENV=/home/ray/dlg ENV PATH="$VIRTUAL_ENV/bin:$PATH" -ENV DLG_ROOT="/tmp/dlg/var/dlg_home" +ENV DLG_ROOT="/var/dlg_home" CMD ["dlg", "daemon", "-vv"] \ No newline at end of file diff --git a/daliuge-engine/docker/Dockerfile.dev b/daliuge-engine/docker/Dockerfile.dev index 4ad67b797..5f44af744 100644 --- a/daliuge-engine/docker/Dockerfile.dev +++ b/daliuge-engine/docker/Dockerfile.dev @@ -5,11 +5,8 @@ FROM icrar/daliuge-common:${VCS_TAG:-latest} # RUN sudo apt-get update && sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata \ # gcc g++ gdb casacore-dev clang-tidy-10 clang-tidy libboost1.71-all-dev libgsl-dev -RUN apt-get update &&\ - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends gcc python3-pip curl - COPY / /daliuge -RUN . /root/dlg/bin/activate && pip install wheel && cd /daliuge && \ +RUN . /dlg/bin/activate && pip install wheel && cd /daliuge && \ pip install . EXPOSE 9000 @@ -20,8 +17,8 @@ EXPOSE 8001 EXPOSE 8002 # enable the virtualenv path from daliuge-common -ENV VIRTUAL_ENV=/root/dlg +ENV VIRTUAL_ENV=/dlg ENV PATH="$VIRTUAL_ENV/bin:$PATH" -ENV DLG_ROOT="/tmp/dlg/var/dlg_home" +ENV DLG_ROOT="/tmp/dlg" CMD ["dlg", "daemon", "-vv"] \ No newline at end of file diff --git a/daliuge-engine/docker/group.template b/daliuge-engine/docker/group.template new file mode 100644 index 000000000..135ac16a3 --- /dev/null +++ b/daliuge-engine/docker/group.template @@ -0,0 +1,42 @@ +root:x:0: +daemon:x:1: +bin:x:2: +sys:x:3: +adm:x:4: +tty:x:5: +disk:x:6: +lp:x:7: +mail:x:8: +news:x:9: +uucp:x:10: +man:x:12: +proxy:x:13: +kmem:x:15: +dialout:x:20: +fax:x:21: +voice:x:22: +cdrom:x:24: +floppy:x:25: +tape:x:26: +sudo:x:27: +audio:x:29: +dip:x:30: +www-data:x:33: +backup:x:34: +operator:x:37: +list:x:38: +irc:x:39: +src:x:40: +gnats:x:41: +shadow:x:42: +utmp:x:43: +video:x:44: +sasl:x:45: +plugdev:x:46: +staff:x:50: +games:x:60: +users:x:100: +nogroup:x:65534: +crontab:x:101: +messagebus:x:102: +ssh:x:103: diff --git a/daliuge-engine/docker/passwd.template b/daliuge-engine/docker/passwd.template new file mode 100644 index 000000000..c9043069a --- /dev/null +++ b/daliuge-engine/docker/passwd.template @@ -0,0 +1,20 @@ +root:x:0:0:root:/root:/bin/bash +daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin +bin:x:2:2:bin:/bin:/usr/sbin/nologin +sys:x:3:3:sys:/dev:/usr/sbin/nologin +sync:x:4:65534:sync:/bin:/bin/sync +games:x:5:60:games:/usr/games:/usr/sbin/nologin +man:x:6:12:man:/var/cache/man:/usr/sbin/nologin +lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin +mail:x:8:8:mail:/var/mail:/usr/sbin/nologin +news:x:9:9:news:/var/spool/news:/usr/sbin/nologin +uucp:x:10:10:uucp:/var/spool/uucp:/usr/sbin/nologin +proxy:x:13:13:proxy:/bin:/usr/sbin/nologin +www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin +backup:x:34:34:backup:/var/backups:/usr/sbin/nologin +list:x:38:38:Mailing List Manager:/var/list:/usr/sbin/nologin +irc:x:39:39:ircd:/var/run/ircd:/usr/sbin/nologin +gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/usr/sbin/nologin +nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin +_apt:x:100:65534::/nonexistent:/usr/sbin/nologin +messagebus:x:101:102::/nonexistent:/usr/sbin/nologin diff --git a/daliuge-engine/docker/prepare_user.py b/daliuge-engine/docker/prepare_user.py new file mode 100644 index 000000000..72cb8e674 --- /dev/null +++ b/daliuge-engine/docker/prepare_user.py @@ -0,0 +1,48 @@ +# +# ICRAR - International Centre for Radio Astronomy Research +# (c) UWA - The University of Western Australia, 2014 +# Copyright by UWA (in the framework of the ICRAR) +# All rights reserved +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# +""" +Script to generate passwd and group files for docker containers to mount. +This will make sure that the apps in the containers are running as the +current user and thus the generated files have the correct owner. + +Inspired by Stimela code +""" +import pwd, grp, os + +workdir = f"{os.environ['DLG_ROOT']}/workspace/settings" +try: + os.mkdir(workdir) +except FileExistsError: + pass +except: + raise +template_dir = os.path.join(os.path.dirname(__file__), ".") +# get current user info +pw = pwd.getpwuid(os.getuid()) +gr = grp.getgrgid(pw.pw_gid) +with open(os.path.join(workdir, "passwd"), "wt") as file: + file.write(open(os.path.join(template_dir, "passwd.template"), "rt").read()) + file.write(f"{pw.pw_name}:x:{pw.pw_uid}:{pw.pw_gid}:{pw.pw_gecos}:/:/bin/bash") +with open(os.path.join(workdir, "group"), "wt") as file: + file.write(open(os.path.join(template_dir, "group.template"), "rt").read()) + file.write(f"{gr.gr_name}:x:{gr.gr_gid}:") + diff --git a/daliuge-engine/lalo/A b/daliuge-engine/lalo/A new file mode 100644 index 000000000..dd6fe23c7 --- /dev/null +++ b/daliuge-engine/lalo/A @@ -0,0 +1 @@ +xĚD \ No newline at end of file diff --git a/daliuge-engine/lalo/C b/daliuge-engine/lalo/C new file mode 100644 index 000000000..dd6fe23c7 --- /dev/null +++ b/daliuge-engine/lalo/C @@ -0,0 +1 @@ +xĚD \ No newline at end of file diff --git a/daliuge-engine/pip/requirements.txt b/daliuge-engine/pip/requirements.txt index b696a713a..6dd9ae859 100644 --- a/daliuge-engine/pip/requirements.txt +++ b/daliuge-engine/pip/requirements.txt @@ -17,5 +17,6 @@ python-daemon pyzmq scp twine +pyyaml # 0.6 brings python3 support plus other fixes zerorpc >= 0.6 diff --git a/daliuge-engine/run_engine.sh b/daliuge-engine/run_engine.sh index bc1b0f555..8841971cc 100755 --- a/daliuge-engine/run_engine.sh +++ b/daliuge-engine/run_engine.sh @@ -6,7 +6,20 @@ DOCKER_OPTS="\ -p 5555:5555 -p 6666:6666 \ -p 8000:8000 -p 8001:8001 \ -p 8002:8002 -p 9000:9000 \ +--user $(id -u):$(id -g) \ " +common_prep () +{ + mkdir -p ${DLG_ROOT}/workspace + mkdir -p ${DLG_ROOT}/testdata + mkdir -p ${DLG_ROOT}/code + # get current user and group id and prepare passwd and group files + python docker/prepare_user.py + DOCKER_OPTS=${DOCKER_OPTS}" -v ${DLG_ROOT}/workspace/settings/passwd:/etc/passwd" + DOCKER_OPTS=${DOCKER_OPTS}" -v ${DLG_ROOT}/workspace/settings/group:/etc/group" + DOCKER_OPTS=${DOCKER_OPTS}" -v ${PWD}/dlg/manager:/dlg/lib/python3.8/site-packages/dlg/manager" + DOCKER_OPTS=${DOCKER_OPTS}" -v ${DLG_ROOT}:${DLG_ROOT} --env DLG_ROOT=${DLG_ROOT}" +} case "$1" in "dep") @@ -17,21 +30,17 @@ case "$1" in echo "Please either create and grant access to $USER or build and run the development version." else VCS_TAG=`git describe --tags --abbrev=0|sed s/v//` - DOCKER_OPTS=${DOCKER_OPTS}"-v ${DLG_ROOT}:${DLG_ROOT} --env DLG_ROOT=${DLG_ROOT} " + common_prep() echo "Running Engine deployment version in background..." echo "docker run -td "${DOCKER_OPTS}" icrar/daliuge-engine:${VCS_TAG}" docker run -td ${DOCKER_OPTS} icrar/daliuge-engine:${VCS_TAG} exit 0 fi;; "dev") - DLG_ROOT="/tmp/dlg" + export DLG_ROOT="/tmp/dlg" export VCS_TAG=`git rev-parse --abbrev-ref HEAD | tr '[:upper:]' '[:lower:]'` + common_prep echo "Running Engine development version in background..." - mkdir -p ${DLG_ROOT}/workspace - mkdir -p ${DLG_ROOT}/testdata - mkdir -p ${DLG_ROOT}/code - DOCKER_OPTS=${DOCKER_OPTS}"-v ${PWD}/dlg/manager:/root/dlg/lib/python3.8/site-packages/dlg/manager" - DOCKER_OPTS=${DOCKER_OPTS}" -v ${DLG_ROOT}:${DLG_ROOT} --env DLG_ROOT=${DLG_ROOT}" echo "docker run -td ${DOCKER_OPTS} icrar/daliuge-engine:${VCS_TAG}" docker run -td ${DOCKER_OPTS} icrar/daliuge-engine:${VCS_TAG} sleep 3 @@ -41,11 +50,7 @@ case "$1" in DLG_ROOT="/tmp/dlg" export VCS_TAG=`git rev-parse --abbrev-ref HEAD | tr '[:upper:]' '[:lower:]'` echo "Running Engine development version in background..." - mkdir -p ${DLG_ROOT}/workspace - mkdir -p ${DLG_ROOT}/testdata - mkdir -p ${DLG_ROOT}/code - DOCKER_OPTS=${DOCKER_OPTS}"-v ${PWD}/dlg/manager:/root/dlg/lib/python3.8/site-packages/dlg/manager" - DOCKER_OPTS=${DOCKER_OPTS}" -v ${DLG_ROOT}:${DLG_ROOT} --env DLG_ROOT=${DLG_ROOT}" + common_prep CONTAINER_NM="icrar/daliuge-engine:${VCS_TAG}-casa" echo "docker run -td ${DOCKER_OPTS} ${CONTAINER_NM}" docker run -td ${DOCKER_OPTS} ${CONTAINER_NM} diff --git a/daliuge-engine/setup.py b/daliuge-engine/setup.py index 297cf1d62..10bbf8bee 100644 --- a/daliuge-engine/setup.py +++ b/daliuge-engine/setup.py @@ -133,6 +133,7 @@ def run(self): "python-daemon", "pyzmq", "scp", + "pyyaml", # 0.19.0 requires netifaces < 0.10.5, exactly the opposite of what *we* need "zeroconf >= 0.19.1", # 0.6 brings python3 support plus other fixes diff --git a/daliuge-engine/test/apps/test_bash.py b/daliuge-engine/test/apps/test_bash.py index 7e202c6b9..c23d75b11 100644 --- a/daliuge-engine/test/apps/test_bash.py +++ b/daliuge-engine/test/apps/test_bash.py @@ -83,7 +83,7 @@ def assert_message_is_correct(message, command): assert_message_is_correct(msg, 'echo -n "{0}" > %o0'.format(msg)) msg = 'This is a message with a double quotes: "' assert_message_is_correct(msg, "echo -n '{0}' > %o0".format(msg)) - + def test_envvars(self): """Checks that the DLG_* environment variables are available to bash programs""" diff --git a/daliuge-engine/test/deploy/test_helm_client.py b/daliuge-engine/test/deploy/test_helm_client.py new file mode 100644 index 000000000..f28bb3ac2 --- /dev/null +++ b/daliuge-engine/test/deploy/test_helm_client.py @@ -0,0 +1,113 @@ +# +# ICRAR - International Centre for Radio Astronomy Research +# (c) UWA - The University of Western Australia, 2019 +# Copyright by UWA (in the framework of the ICRAR) +# All rights reserved +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# +""" +Module tests the helm chart translation and deployment functionality. +""" +import unittest +import tempfile +import os +import sys +import yaml +import json + +from dlg.common.version import version as dlg_version +from dlg.deploy.helm_client import HelmClient +from dlg.common import Categories + + +@unittest.skipIf(sys.version_info <= (3, 8), "Copyign temp files fail on Python < 3.7") +class TestHelmClient(unittest.TestCase): + + def test_create_default_helm_chart(self): + with tempfile.TemporaryDirectory() as tmp_dir: + helm_client = HelmClient(deploy_dir=tmp_dir, deploy_name='my_fun_name') + helm_client.create_helm_chart('[]') + chart_file_name = os.path.join(helm_client._chart_dir, "Chart.yaml") + with open(chart_file_name, 'r', encoding='utf-8') as chart_file: + chart_data = yaml.safe_load(chart_file) + self.assertEqual(helm_client._chart_name, chart_data['name']) + self.assertEqual(dlg_version, chart_data['appVersion']) + + def test_custom_ports(self): + pass + + def test_create_single_node_helm_chart(self): + pg = [ + {"oid": "A", "type": "plain", "storage": Categories.MEMORY}, + { + "oid": "B", + "type": "app", + "app": "dlg.apps.simple.SleepApp", + "inputs": ["A"], + "outputs": ["C"], + }, + {"oid": "C", "type": "plain", "storage": Categories.MEMORY}, + ] + for drop in pg: + drop["node"] = "127.0.0.1" + drop["island"] = "127.0.0.1" + with tempfile.TemporaryDirectory() as tmp_dir: + helm_client = HelmClient(deploy_dir=tmp_dir, deploy_name='dlg-test') + helm_client.create_helm_chart(json.dumps(pg)) + self.assertEqual(pg, json.loads(helm_client._physical_graph_file)) + self.assertEqual(1, helm_client._num_islands) + self.assertEqual(3, helm_client._num_nodes) + + @unittest.skip + def test_create_multi_node_helm_chart(self): + pg = [ + {"oid": "A", "type": "plain", "storage": Categories.MEMORY, "node": "127.0.0.1", + "island": "127.0.0.1"}, + { + "oid": "B", + "type": "app", + "app": "dlg.apps.simple.SleepApp", + "inputs": ["A"], + "outputs": ["C"], + "node": "127.0.0.1", + "island": "127.0.0.1" + }, + { + "oid": "D", + "type": "app", + "app": "dlg.apps.simple.SleepApp", + "inputs": ["A"], + "outputs": ["E"], + "node": "127.0.0.2", + "island": "127.0.0.2" + }, + {"oid": "C", "type": "plain", "storage": Categories.MEMORY, "node": "127.0.0.1", + "island": "127.0.0.1"}, + {"oid": "E", "type": "plain", "storage": Categories.MEMORY, "node": "127.0.0.2", + "island": "127.0.0.2"} + ] + with tempfile.TemporaryDirectory() as tmp_dir: + helm_client = HelmClient(deploy_dir=tmp_dir, deploy_name='dlg_test') + helm_client.create_helm_chart(pg) + # TODO: Assert translation works + self.assertEqual(2, helm_client._num_islands) + self.assertEqual(5, helm_client._num_nodes) + self.fail("Test not yet implemented") + + @unittest.skip + def test_submit_job(self): + self.fail("Test not yet implemented") diff --git a/daliuge-engine/test/deploy/test_slurm_utils.py b/daliuge-engine/test/deploy/test_slurm_utils.py index a1faf6e32..eb75eed15 100644 --- a/daliuge-engine/test/deploy/test_slurm_utils.py +++ b/daliuge-engine/test/deploy/test_slurm_utils.py @@ -22,12 +22,12 @@ import unittest -from dlg.deploy import slurm_utils +from dlg.deploy import deployment_utils class TestSlurmUtils(unittest.TestCase): def assert_list_as_string(self, s, expected_list): - slurm_list = slurm_utils.list_as_string(s) + slurm_list = deployment_utils.list_as_string(s) self.assertEqual(expected_list, slurm_list) def test_list_as_string(self): diff --git a/daliuge-engine/test/graphs/ddTest.graph b/daliuge-engine/test/graphs/ddTest.graph new file mode 100644 index 000000000..a4cf68549 --- /dev/null +++ b/daliuge-engine/test/graphs/ddTest.graph @@ -0,0 +1,89 @@ +[ + { + "oid": "DD", + "type": "app", + "app": "dlg.apps.bash_shell_app.BashShellApp", + "rank": [ + 0 + ], + "loop_cxt": null, + "tw": 5, + "execution_time": 5, + "num_cpus": 1, + "group_start": false, + "command": "dd", + "input_redirection": "", + "output_redirection": "", + "command_line_arguments": "", + "paramValueSeparator": "=", + "argumentPrefix": "", + "applicationArgs": { + "if": "%i0", + "of": "%o0", + "count": 10, + "bs": 1024 + }, + "iid": "0", + "lg_key": -2, + "dt": "BashShellApp", + "nm": "dd", + "inputs": [ + "A" + ], + "outputs": [ + "C" + ], + "node": "localhost", + "island": "localhost" + }, + { + "oid": "A", + "type": "plain", + "storage": "File", + "rank": [ + 0 + ], + "loop_cxt": null, + "dw": 5, + "check_filepath_exists": false, + "data_volume": 5, + "group_end": false, + "filepath": "", + "dirname": "", + "applicationArgs": {}, + "iid": "0", + "lg_key": -3, + "dt": "File", + "nm": "Input File", + "consumers": [ + "DD" + ], + "node": "localhost", + "island": "localhost" + }, + { + "oid": "C", + "type": "plain", + "storage": "File", + "rank": [ + 0 + ], + "loop_cxt": null, + "dw": 5, + "check_filepath_exists": true, + "data_volume": 5, + "group_end": false, + "filepath": "", + "dirname": "", + "applicationArgs": {}, + "iid": "0", + "lg_key": -4, + "dt": "File", + "nm": "Output File", + "producers": [ + "DD" + ], + "node": "localhost", + "island": "localhost" + } +] \ No newline at end of file diff --git a/daliuge-engine/test/graphs/test_graphExecution.py b/daliuge-engine/test/graphs/test_graphExecution.py new file mode 100644 index 000000000..ff32ec046 --- /dev/null +++ b/daliuge-engine/test/graphs/test_graphExecution.py @@ -0,0 +1,87 @@ +# +# ICRAR - International Centre for Radio Astronomy Research +# (c) UWA - The University of Western Australia, 2015 +# Copyright by UWA (in the framework of the ICRAR) +# All rights reserved +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# +import json +import os +import unittest + +from asyncio.log import logger +import pkg_resources + +from dlg import runtime +from dlg import droputils +from dlg import utils +from dlg.ddap_protocol import DROPStates +from dlg.manager.composite_manager import DataIslandManager +from dlg.testutils import ManagerStarter + +hostname = "localhost" + + +class LocalDimStarter(ManagerStarter): + def setUp(self): + super(LocalDimStarter, self).setUp() + self.nm_info = self.start_nm_in_thread() + self.dm = self.nm_info.manager + self.dim = DataIslandManager([hostname]) + + def tearDown(self): + self.nm_info.stop() + self.dim.shutdown() + super(LocalDimStarter, self).tearDown() + + +class TestGraphs(LocalDimStarter, unittest.TestCase): + """ + Class to test the execution of actual physical graphs, + rather than python constructions. Add additional graphs + and associated tests as required. + """ + + def createSessionAndAddGraph(self, sessionId, graphSpec="", sleepTime=0): + self.dim.createSession(sessionId) + self.assertEqual(0, self.dim.getGraphSize(sessionId)) + self.dim.addGraphSpec(sessionId, graphSpec) + self.assertEqual(len(graphSpec), self.dim.getGraphSize(sessionId)) + + def test_ddGraph(self): + """ + Graph is using dd to read a file and write to another. This is mainly + to test that the separatorString parameter is working correctly. + """ + sessionId = "lalo" + ddGraph = "graphs/ddTest.graph" + with pkg_resources.resource_stream( + "test", ddGraph) as f: # @UndefinedVariable + logger.debug(f'Loading graph: {f}') + graphSpec = json.load(f) + self.createSessionAndAddGraph(sessionId, graphSpec=graphSpec) + + # Deploy now and get A and C + self.dim.deploySession(sessionId) + a, c = [self.dm._sessions[sessionId].drops[x] for x in ("A", "C")] + + data = os.urandom(10) + with droputils.DROPWaiterCtx(self, c, 3): + a.write(data) + a.setCompleted() + + self.assertEqual(data, droputils.allDropContents(c)) diff --git a/daliuge-engine/test/manager/test_dim.py b/daliuge-engine/test/manager/test_dim.py index a7d3e3f6c..ee70259f7 100644 --- a/daliuge-engine/test/manager/test_dim.py +++ b/daliuge-engine/test/manager/test_dim.py @@ -19,6 +19,7 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, # MA 02111-1307 USA # +from asyncio.log import logger import codecs import json import os @@ -295,6 +296,7 @@ def test_fullRound(self): "test", "graphs/complex.js" ) as f: # @UndefinedVariable complexGraphSpec = json.load(codecs.getreader("utf-8")(f)) + logger.debug(f'Loaded graph: {f}') for dropSpec in complexGraphSpec: dropSpec["node"] = hostname testutils.post( diff --git a/daliuge-engine/test/test_ParameterSetDROP.py b/daliuge-engine/test/test_ParameterSetDROP.py index 8e9c9176f..4213d6393 100644 --- a/daliuge-engine/test/test_ParameterSetDROP.py +++ b/daliuge-engine/test/test_ParameterSetDROP.py @@ -20,7 +20,7 @@ # MA 02111-1307 USA # import unittest -from dlg.drop import ParameterSetDROP +from dlg.parset_drop import ParameterSetDROP from dlg.droputils import allDropContents diff --git a/daliuge-engine/test/test_environmentvars.py b/daliuge-engine/test/test_environmentvars.py new file mode 100644 index 000000000..51afd94b9 --- /dev/null +++ b/daliuge-engine/test/test_environmentvars.py @@ -0,0 +1,174 @@ +# +# ICRAR - International Centre for Radio Astronomy Research +# (c) UWA - The University of Western Australia, 2014 +# Copyright by UWA (in the framework of the ICRAR) +# All rights reserved +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# + +import unittest +from dlg.environmentvar_drop import EnvironmentVarDROP +from dlg.drop import AbstractDROP +from dlg.utils import getDlgDir + + +def create_std_env_vars(name='env_vars'): + return EnvironmentVarDROP(oid='a', uid='a', nm=name, dir_var='/HOME/', int_var=3, + bool_var=False, + float_var=0.5, dict_var={'first': 1, 'second': 'sec'}, + list_var=[1, 2.0, '3']) + + +def create_empty_env_vars(name='env_vars'): + return EnvironmentVarDROP(oid='b', uid='b', nm=name) + + +class TestEnvironmentVarDROP(unittest.TestCase): + + def test_get(self): + """ + Tests that environment variables are read in and fetched correctly. + """ + env_drop = create_std_env_vars() + self.assertEqual('/HOME/', env_drop.get('dir_var')) + self.assertEqual(3, env_drop.get('int_var')) + self.assertEqual(False, env_drop.get('bool_var')) + self.assertEqual(0.5, env_drop.get('float_var')) + self.assertEqual({'first': 1, 'second': 'sec'}, env_drop.get('dict_var')) + self.assertEqual([1, 2.0, '3'], env_drop.get('list_var')) + self.assertIsNone(env_drop.get('non_var')) + self.assertIsNone(env_drop.get('uid')) + + def test_get_empty(self): + """ + Tests that an empty environment drop contains no environment variables. + """ + env_drop = create_empty_env_vars() + self.assertEqual(dict(), env_drop._variables) + + def test_get_multiple(self): + """ + Tests the get_multiple routine for environment variables is correct + """ + env_drop = create_std_env_vars() + expected_vars = [None, '/HOME/', 3, False, 0.5, {'first': 1, 'second': 'sec'}, + [1, 2.0, '3'], None] + query_keys = ['uid', 'dir_var', 'int_var', 'bool_var', 'float_var', 'dict_var', 'list_var', + 'non_var'] + self.assertEqual(expected_vars, env_drop.get_multiple(query_keys)) + + def test_set(self): + """ + Should currently raise un-implemented, but here for completeness + """ + env_drop = create_std_env_vars() + self.assertRaises(NotImplementedError, env_drop.set, 'var', 'val') + + def test_drop_get_single(self): + """ + Tests the AbstractDROP fetch routine functions correctly with a single environment drop + """ + env_drop = create_std_env_vars() + test_drop = AbstractDROP(uid='b', oid='b') + test_drop.addProducer(env_drop) + self.assertEqual('/HOME/', test_drop.get_environment_variable('$env_vars.dir_var')) + self.assertEqual(3, test_drop.get_environment_variable('$env_vars.int_var')) + self.assertEqual(False, test_drop.get_environment_variable('$env_vars.bool_var')) + self.assertEqual(0.5, test_drop.get_environment_variable('$env_vars.float_var')) + self.assertEqual({'first': 1, 'second': 'sec'}, + test_drop.get_environment_variable('$env_vars.dict_var')) + self.assertEqual([1, 2.0, '3'], test_drop.get_environment_variable('$env_vars.list_var')) + self.assertIsNone(test_drop.get_environment_variable('$env_vars.non_var')) + self.assertIsNone(test_drop.get_environment_variable('$env_vars.uid')) + + def test_drop_get_multiple(self): + """ + Tests the AbstractDROP multiple fetch routine functions correctly with a single environment + drop + """ + env_name = 'env_vars' + env_drop = create_std_env_vars(name=env_name) + test_drop = AbstractDROP(uid='b', oid='b') + test_drop.addProducer(env_drop) + expected_vars = [None, '/HOME/', 3, False, 0.5, {'first': 1, 'second': 'sec'}, + [1, 2.0, '3'], None] + query_keys = ['uid', 'dir_var', 'int_var', 'bool_var', 'float_var', 'dict_var', 'list_var', + 'non_var'] + query_keys = [f'${env_name}.{x}' for x in query_keys] # Build queries of the correct form + # Add some purposefully malformed vars + query_keys.extend(['dir_var', '$non_store.non_var']) + expected_vars.extend([None, None]) + self.assertEqual(expected_vars, test_drop.get_environment_variables(query_keys)) + + def test_drop_get_empty(self): + """ + Tests the case where the environment drop has no name + """ + env_name = '' + env_drop = create_empty_env_vars(name=env_name) + test_drop = AbstractDROP(uid='c', oid='c') + test_drop.addProducer(env_drop) + self.assertEqual(None, test_drop.get_environment_variable('')) + self.assertEqual(None, test_drop.get_environment_variable('$')) + + def test_drop_get_multiEnv(self): + """ + Tests the AbstractDROP fetch routine with multiple environment drops + """ + env1_name = 'env_vars' + env2_name = 'more_vars' + env1_drop = create_std_env_vars(name=env1_name) + env2_drop = EnvironmentVarDROP(oid='d', uid='d', nm=env2_name, dir_var='/DIFFERENT/', + int_var=4) + test_drop = AbstractDROP(uid='c', oid='c') + test_drop.addProducer(env1_drop) + test_drop.addProducer(env2_drop) + self.assertEqual('/HOME/', test_drop.get_environment_variable(f"${env1_name}.dir_var")) + self.assertEqual('/DIFFERENT/', + test_drop.get_environment_variable(f"${env2_name}.dir_var")) + self.assertEqual(3, test_drop.get_environment_variable(f"${env1_name}.int_var")) + self.assertEqual(4, test_drop.get_environment_variable(f"${env2_name}.int_var")) + self.assertIsNone(test_drop.get_environment_variable(f'{env1_name}.int_var')) + self.assertIsNone(test_drop.get_environment_variable(f'.int_var')) + self.assertIsNone(test_drop.get_environment_variable(f'$third_env.int_var')) + self.assertEqual(['/HOME/', '/DIFFERENT/', 3, 4, None, None], + test_drop.get_environment_variables( + [f'${env1_name}.dir_var', f'${env2_name}.dir_var', + f'${env1_name}.int_var', f'${env2_name}.int_var', + f'${env1_name}.non_var', '$fake.var'] + )) + + def test_autofill_environment_vars(self): + """ + Tests the autofilling functionality of AbstractDROP + """ + env_drop = create_std_env_vars(name='env_vars') + test_drop = AbstractDROP(oid='a', uid='a', dir_var='$env_vars.dir_var', + int_var='$env_vars.int_var', non_var=set()) + test_drop.addProducer(env_drop) + test_drop.autofill_environment_variables() + self.assertEqual('/HOME/', test_drop.parameters['dir_var']) + self.assertEqual(3, test_drop.parameters['int_var']) + + def test_get_dlg_vars(self): + test_drop = AbstractDROP(oid='a', uid='a', dlg_root='$DLG_ROOT', + non_dlg_var='$DLG_NONEXISTS', non_var=set()) + test_drop.autofill_environment_variables() + self.assertEqual(getDlgDir(), test_drop.parameters['dlg_root']) + self.assertEqual(getDlgDir(), test_drop.get_environment_variable('$DLG_ROOT')) + self.assertEqual(None, test_drop.parameters['non_dlg_var']) + self.assertEqual(None, test_drop.get_environment_variable('$DLG_NONEXISTS')) diff --git a/daliuge-k8s/helm/README.md b/daliuge-k8s/helm/README.md new file mode 100644 index 000000000..f7a6bbb27 --- /dev/null +++ b/daliuge-k8s/helm/README.md @@ -0,0 +1,21 @@ +NOTE: there are two deployment versions, one for minikube and one for a cluster, copy the correct file into templates. + +The DALiuGE root directory needs to be visible inside the cluster. In the case of minikube this means you may need to run + + minikube mount /dlg:/dlg + +Finally, on minikube you may need to run the follwoing + + minikube tunnel --cleanup + +NOTE: On MacOS you can run with --clenaup and will start it and cleanup after. Not sure if this is the dame for all platforms. + +NOTE: Using --values my-values will overwrite any values specified in the values.yaml file. + +# Install/Setup +From mychart directory + +helm install daliuge-daemon . --values my-values.yaml +kubectl get svc -o wide +curl -d '{"nodes": ["localhost"]}' -H "Content-Type: application/json" -X POST http://:9000/managers/island/start +helm uninstall daliuge-daemon diff --git a/daliuge-k8s/helm/daliuge-daemon-depl-store-cluster.yaml b/daliuge-k8s/helm/daliuge-daemon-depl-store-cluster.yaml new file mode 100644 index 000000000..c740a98ae --- /dev/null +++ b/daliuge-k8s/helm/daliuge-daemon-depl-store-cluster.yaml @@ -0,0 +1,34 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: daliuge-daemon-deployment + labels: + app: daliuge-daemon +spec: + replicas: 1 + selector: + matchLabels: + app: daliuge-daemon + template: + metadata: + labels: + app: daliuge-daemon + spec: + volumes: + - name: daliuge-pv-storage + persistentVolumeClaim: + claimName: daliuge-pv-claim + containers: + - name: daliuge-daemon + image: icrar/daliuge-engine:master + ports: + - containerPort: 9000 + volumeMounts: + - mountPath: "/dlg" + name: daliuge-pv-storage + env: + - name: DLG_ROOT + valueFrom: + configMapKeyRef: + name: daliuge-daemon-configmap + key: dlg_root diff --git a/daliuge-k8s/helm/daliuge-daemon-pv.yaml b/daliuge-k8s/helm/daliuge-daemon-pv.yaml new file mode 100644 index 000000000..de62f1ac5 --- /dev/null +++ b/daliuge-k8s/helm/daliuge-daemon-pv.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: daliuge-pv-volume + labels: + type: local +spec: + storageClassName: manual + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + hostPath: + path: "/dlg" diff --git a/daliuge-k8s/helm/daliuge-daemon-pvc.yaml b/daliuge-k8s/helm/daliuge-daemon-pvc.yaml new file mode 100644 index 000000000..4fc8ed932 --- /dev/null +++ b/daliuge-k8s/helm/daliuge-daemon-pvc.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: daliuge-pv-claim +spec: + storageClassName: manual + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi diff --git a/daliuge-k8s/helm/daliuge-daemon/.helmignore b/daliuge-k8s/helm/daliuge-daemon/.helmignore new file mode 100644 index 000000000..e69de29bb diff --git a/daliuge-k8s/helm/daliuge-daemon/Chart.yaml b/daliuge-k8s/helm/daliuge-daemon/Chart.yaml new file mode 100644 index 000000000..2bfd71a42 --- /dev/null +++ b/daliuge-k8s/helm/daliuge-daemon/Chart.yaml @@ -0,0 +1,13 @@ +apiVersion: v2 +appVersion: 2.0.1 +description: DALiuGE k8s deployment +home: https://github.com/ICRAR/daliuge/daliuge-k8s +keywords: +- daliuge +- workflow +kubeVersion: '>=1.10.0-0' +name: daliuge-daemon +sources: +- https://github.com/ICRAR/daliuge/daliuge-k8s +type: application +version: 0.1.0 diff --git a/daliuge-k8s/helm/daliuge-daemon/my-values.yaml b/daliuge-k8s/helm/daliuge-daemon/my-values.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/daliuge-k8s/helm/daliuge-daemon/templates/daliuge-daemon-configmap.yaml b/daliuge-k8s/helm/daliuge-daemon/templates/daliuge-daemon-configmap.yaml new file mode 100644 index 000000000..43ab6312b --- /dev/null +++ b/daliuge-k8s/helm/daliuge-daemon/templates/daliuge-daemon-configmap.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: daliuge-daemon-configmap +data: + dlg_root: {{ .Values.dlg_root_in_container }} diff --git a/daliuge-k8s/helm/daliuge-daemon/templates/daliuge-daemon-depl-store-minikube.yaml b/daliuge-k8s/helm/daliuge-daemon/templates/daliuge-daemon-depl-store-minikube.yaml new file mode 100644 index 000000000..83dbd2770 --- /dev/null +++ b/daliuge-k8s/helm/daliuge-daemon/templates/daliuge-daemon-depl-store-minikube.yaml @@ -0,0 +1,34 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: daliuge-daemon-deployment + labels: + app: daliuge-daemon +spec: + replicas: 1 + selector: + matchLabels: + app: daliuge-daemon + template: + metadata: + labels: + app: daliuge-daemon + spec: + volumes: + - name: dlg-mount + hostPath: + path: {{ .Values.dlg_root_on_cluster_nodes }} + containers: + - name: daliuge-daemon + image: {{ .Values.containers.name }} + ports: + - containerPort: {{ .Values.containers.ports.containerPort }} + volumeMounts: + - mountPath: {{ .Values.dlg_root_in_container }} + name: dlg-mount + env: + - name: DLG_ROOT + valueFrom: + configMapKeyRef: + name: daliuge-daemon-configmap + key: dlg_root diff --git a/daliuge-k8s/helm/daliuge-daemon/templates/daliuge-daemon-service.yaml b/daliuge-k8s/helm/daliuge-daemon/templates/daliuge-daemon-service.yaml new file mode 100644 index 000000000..074d98812 --- /dev/null +++ b/daliuge-k8s/helm/daliuge-daemon/templates/daliuge-daemon-service.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Service +metadata: + name: daliuge-daemon-service +spec: + selector: + app: daliuge-daemon + ports: + - protocol: TCP + name: {{ .Values.service.daemon.name }} + port: {{ .Values.service.daemon.port }} + targetPort: {{ .Values.containers.ports.containerPort }} + - protocol: TCP + name: {{ .Values.service.deployment.name }} + port: {{ .Values.service.deployment.port }} + targetPort: {{ .Values.containers.ports.deploymentPort }} + - protocol: TCP + name: {{ .Values.service.nodemgr.name }} + port: {{ .Values.service.nodemgr.port }} + targetPort: {{ .Values.containers.ports.nodemanagerPort }} \ No newline at end of file diff --git a/daliuge-k8s/helm/daliuge-daemon/values.yaml b/daliuge-k8s/helm/daliuge-daemon/values.yaml new file mode 100644 index 000000000..e51a922be --- /dev/null +++ b/daliuge-k8s/helm/daliuge-daemon/values.yaml @@ -0,0 +1,19 @@ +name: daliuge-daemon +dlg_root_on_cluster_nodes: /dlg +dlg_root_in_container: /dlg +containers: + name: icrar/daliuge-engine:2.0.1 + ports: + containerPort: 9000 + deploymentPort: 8001 + nodemanagerPort: 8000 +service: + daemon: + name: daemon-port + port: 9000 + deployment: + name: island-port + port: 8001 + nodemgr: + name: node-manager-port + port: 8000 \ No newline at end of file diff --git a/daliuge-translator/dlg/dropmake/pg_generator.py b/daliuge-translator/dlg/dropmake/pg_generator.py index 18f6c1ec1..54f00ab80 100644 --- a/daliuge-translator/dlg/dropmake/pg_generator.py +++ b/daliuge-translator/dlg/dropmake/pg_generator.py @@ -542,12 +542,19 @@ def make_oid(self, iid="0"): return "{0}_{1}_{2}".format(self._ssid, self.id, iid), rank def _update_key_value_attributes(self, kwargs): + # NOTE: We should really just pass all of these on un-altered and finally drop + # support for the Arg%02d arguments. # get the arguments from new fields dictionary in a backwards compatible way if "fields" in self.jd: for je in self.jd["fields"]: # The field to be used is not the text, but the name field self.jd[je["name"]] = je["value"] kwargs[je["name"]] = je["value"] + kwargs["applicationArgs"] = {} # make sure the dict always exists downstream + if "applicationArgs" in self.jd: # and fill it if provided + for je in self.jd["applicationArgs"]: + self.jd[je["name"]] = je["value"] + kwargs["applicationArgs"][je["name"]] = je["value"] for i in range(10): k = "Arg%02d" % (i + 1) if k not in self.jd: @@ -624,13 +631,43 @@ def _create_test_drop_spec(self, oid, rank, kwargs) -> dropdict: kwargs["filepath"] = fp self._update_key_value_attributes(kwargs) drop_spec.update(kwargs) - elif drop_type in [Categories.COMPONENT, Categories.PYTHON_APP, Categories.BRANCH]: + elif drop_type in [Categories.COMPONENT, Categories.PYTHON_APP, Categories.BRANCH, Categories.DOCKER]: # default generic component becomes "sleep and copy" - if "appclass" not in self.jd or len(self.jd["appclass"]) == 0: - app_class = "dlg.apps.simple.SleepApp" + if drop_type not in [Categories.DOCKER]: + if "appclass" not in self.jd or len(self.jd["appclass"]) == 0: + app_class = "dlg.apps.simple.SleepApp" + else: + app_class = self.jd["appclass"] else: - app_class = self.jd["appclass"] - + # deal with the Docker specific component params + app_class = "dlg.apps.dockerapp.DockerApp" + typ = DropType.APP + image = str(self.jd.get("image")) + if image == "": + raise GraphException("Missing image for Docker component '%s'" % self.text) + + command = str(self.jd.get("command")) + # There ARE containers which don't need/want a command + # if command == "": + # raise GraphException("Missing command for Construct '%s'" % self.text) + + kwargs["image"] = image + kwargs["command"] = command + # TODO: User inside docker should follow user of engine. + kwargs["user"] = str(self.jd.get("user", "")) + kwargs["ensureUserAndSwitch"] = self.str_to_bool( + str(self.jd.get("ensureUserAndSwitch", "0")) + ) + kwargs["removeContainer"] = self.str_to_bool( + str(self.jd.get("removeContainer", "1")) + ) + kwargs["additionalBindings"] = str(self.jd.get("additionalBindings", "")) + if kwargs["additionalBindings"]: + kwargs["additionalBindings"] += "," + # always mount DLG_ROOT directory. ENV variable is only known in engine + kwargs["additionalBindings"] += "${DLG_ROOT}:${DLG_ROOT}" + kwargs["portMappings"] = str(self.jd.get("portMappings", "")) + kwargs["shmSize"] = str(self.jd.get("shmSize","")) if "execution_time" in self.jd: execTime = int(self.jd["execution_time"]) if execTime < 0: @@ -656,7 +693,7 @@ def _create_test_drop_spec(self, oid, rank, kwargs) -> dropdict: kwargs["num_cpus"] = int(self.jd.get("num_cpus", 1)) if "mkn" in self.jd: kwargs["mkn"] = self.jd["mkn"] - self._update_key_value_attributes(kwargs) + self._update_key_value_attributes(kwargs) # pass on all other kw-value pairs drop_spec.update(kwargs) elif drop_type in [Categories.DYNLIB_APP, Categories.DYNLIB_PROC_APP]: @@ -696,61 +733,44 @@ def _create_test_drop_spec(self, oid, rank, kwargs) -> dropdict: ) # add more arguments cmds = [] - for i in range(10): - k = "Arg%02d" % (i + 1,) - if k not in self.jd: - k = "arg%02d" % (i + 1,) - if k not in self.jd: - continue - v = self.jd[k] - if v is not None and len(str(v)) > 0: - cmds.append(str(v)) - # add more arguments - this is the new method of adding arguments in EAGLE - # the method above (Arg**) is retained for compatibility, but eventually should be removed - for k in [ - "command", - "input_redirection", - "output_redirection", - "command_line_arguments", - ]: - if k in self.jd: - cmds.append(self.jd[k]) - # kwargs['command'] = ' '.join(cmds) - kwargs["command"] = BashCommand(cmds) + if "command" in self.jd: + cmds = [self.jd["command"]] + self._update_key_value_attributes(kwargs) # get all the other params + kwargs["command"] = BashCommand(cmds) # NOTE: Not really required anymore? kwargs["num_cpus"] = int(self.jd.get("num_cpus", 1)) drop_spec.update(kwargs) - elif drop_type == Categories.DOCKER: - # Docker application. - app_class = "dlg.apps.dockerapp.DockerApp" - typ = DropType.APP - drop_spec = dropdict( - {"oid": oid, "type": typ, "app": app_class, "rank": rank} - ) - - image = str(self.jd.get("image")) - if image == "": - raise GraphException("Missing image for Construct '%s'" % self.text) - - command = str(self.jd.get("command")) - # There ARE containers which don't need/want a command - # if command == "": - # raise GraphException("Missing command for Construct '%s'" % self.text) - - kwargs["tw"] = int(self.jd.get("execution_time", "5")) - kwargs["image"] = image - kwargs["command"] = command - kwargs["user"] = str(self.jd.get("user", "")) - kwargs["ensureUserAndSwitch"] = self.str_to_bool( - str(self.jd.get("ensureUserAndSwitch", "0")) - ) - kwargs["removeContainer"] = self.str_to_bool( - str(self.jd.get("removeContainer", "1")) - ) - kwargs["additionalBindings"] = str(self.jd.get("additionalBindings", "")) - kwargs["portMappings"] = str(self.jd.get("portMappings", "")) - kwargs["shmSize"] = str(self.jd.get("shmSize","")) - drop_spec.update(kwargs) + # elif drop_type == Categories.DOCKER: + # # Docker application. + # app_class = "dlg.apps.dockerapp.DockerApp" + # typ = DropType.APP + # drop_spec = dropdict( + # {"oid": oid, "type": typ, "app": app_class, "rank": rank} + # ) + + # image = str(self.jd.get("image")) + # if image == "": + # raise GraphException("Missing image for Construct '%s'" % self.text) + + # command = str(self.jd.get("command")) + # # There ARE containers which don't need/want a command + # # if command == "": + # # raise GraphException("Missing command for Construct '%s'" % self.text) + + # kwargs["tw"] = int(self.jd.get("execution_time", "5")) + # kwargs["image"] = image + # kwargs["command"] = command + # kwargs["user"] = str(self.jd.get("user", "")) + # kwargs["ensureUserAndSwitch"] = self.str_to_bool( + # str(self.jd.get("ensureUserAndSwitch", "0")) + # ) + # kwargs["removeContainer"] = self.str_to_bool( + # str(self.jd.get("removeContainer", "1")) + # ) + # kwargs["additionalBindings"] = str(self.jd.get("additionalBindings", "")) + # kwargs["portMappings"] = str(self.jd.get("portMappings", "")) + # kwargs["shmSize"] = str(self.jd.get("shmSize","")) + # drop_spec.update(kwargs) elif drop_type == Categories.GROUP_BY: drop_spec = dropdict( diff --git a/daliuge-translator/dlg/dropmake/web/lg_web.py b/daliuge-translator/dlg/dropmake/web/lg_web.py index cb44b3412..4819432cf 100644 --- a/daliuge-translator/dlg/dropmake/web/lg_web.py +++ b/daliuge-translator/dlg/dropmake/web/lg_web.py @@ -60,6 +60,7 @@ # Patched to be larger to accomodate large config drops bottle.BaseRequest.MEMFILE_MAX = 1024 * 512 + def file_as_string(fname, enc="utf8"): b = pkg_resources.resource_string(__name__, fname) # @UndefinedVariable return common.b2s(b, enc) @@ -290,6 +291,36 @@ def get_schedule_mat(): return "Failed to get schedule matrices for {0}: {1}".format(pgt_id, ex) +@get("/gen_pg_helm") +def gen_pg_helm(): + """ + RESTful interface to deploy a PGT as a K8s helm chart. + """ + # Get pgt_data + from ...deploy.start_helm_cluster import start_helm + pgt_id = request.query.get("pgt_id") + pgtp = pg_mgr.get_pgt(pgt_id) + if pgtp is None: + response.status = 404 + return "PGT(P) with id {0} not found in the Physical Graph Manager".format( + pgt_id + ) + + pgtpj = pgtp._gojs_json_obj + logger.info("PGTP: %s" % pgtpj) + num_partitions = len(list(filter(lambda n: 'isGroup' in n, pgtpj['nodeDataArray']))) + # Send pgt_data to helm_start + try: + start_helm(pgtp, num_partitions, pgt_dir) + except restutils.RestClientException as ex: + response.status = 500 + print(traceback.format_exc()) + return "Fail to deploy physical graph: {0}".format(ex) + # TODO: Not sure what to redirect to yet + response.status = 200 + return "Inspect your k8s dashboard for deployment status" + + @get("/gen_pg") def gen_pg(): """ @@ -310,7 +341,7 @@ def gen_pg(): pgtpj = pgtp._gojs_json_obj logger.info("PGTP: %s" % pgtpj) num_partitions = 0 - num_partitions = len(list(filter(lambda n:'isGroup' in n, pgtpj['nodeDataArray']))) + num_partitions = len(list(filter(lambda n: 'isGroup' in n, pgtpj['nodeDataArray']))) surl = urlparse(request.url) mhost = "" @@ -475,7 +506,7 @@ def gen_pgt(): pgt_view_json_name=pgt_id, partition_info=part_info, title="Physical Graph Template%s" - % ("" if num_partitions == 0 else "Partitioning"), + % ("" if num_partitions == 0 else "Partitioning"), ) except GraphException as ge: response.status = 500 diff --git a/daliuge-translator/docker/Dockerfile.dev b/daliuge-translator/docker/Dockerfile.dev index d5bc0be59..e081213c0 100644 --- a/daliuge-translator/docker/Dockerfile.dev +++ b/daliuge-translator/docker/Dockerfile.dev @@ -7,13 +7,14 @@ ARG BUILD_ID FROM icrar/daliuge-common:${VCS_TAG} LABEL stage=builder LABEL build=$BUILD_ID -RUN apt-get update && \ - apt-get clean && \ - apt install -y gcc python3-venv python3-distutils +# all dependencies are already installed in daliuge-common +# RUN apt-get update && \ +# apt-get clean && \ +# apt install -y gcc python3-venv python3-distutils COPY / /daliuge -RUN . /root/dlg/bin/activate && \ +RUN . /dlg/bin/activate && \ cd /daliuge && \ pip3 install wheel && \ pip3 install . @@ -21,11 +22,11 @@ RUN . /root/dlg/bin/activate && \ # Second stage build taking what's required from first stage FROM icrar/daliuge-common:${VCS_TAG} COPY --from=0 /daliuge/. /daliuge/. -COPY --from=0 /root/dlg /root/dlg -RUN apt-get update && apt-get install -y libmetis-dev python3 +COPY --from=0 /dlg /dlg +# RUN apt-get update && apt-get install -y libmetis-dev python3 # enable the virtualenv path from daliuge-common -ENV VIRTUAL_ENV=/root/dlg +ENV VIRTUAL_ENV=/dlg ENV PATH="$VIRTUAL_ENV/bin:$PATH" EXPOSE 8084 diff --git a/daliuge-translator/run_translator.sh b/daliuge-translator/run_translator.sh index 0eca0ea05..c79557e9b 100755 --- a/daliuge-translator/run_translator.sh +++ b/daliuge-translator/run_translator.sh @@ -7,12 +7,12 @@ case "$1" in "dev") export VCS_TAG=`git rev-parse --abbrev-ref HEAD| tr '[:upper:]' '[:lower:]'` echo "Running Translator development version in foreground..." - docker run --volume $PWD/dlg/dropmake:/root/dlg/lib/python3.8/site-packages/dlg/dropmake --name daliuge-translator --rm -t -p 8084:8084 icrar/daliuge-translator:${VCS_TAG} + docker run --volume $PWD/dlg/dropmake:/dlg/lib/python3.8/site-packages/dlg/dropmake --name daliuge-translator --rm -t -p 8084:8084 icrar/daliuge-translator:${VCS_TAG} exit 0;; "casa") export VCS_TAG=`git rev-parse --abbrev-ref HEAD| tr '[:upper:]' '[:lower:]'`-casa echo "Running Translator development version in foreground..." - docker run --volume $PWD/dlg/dropmake:/root/dlg/lib/python3.8/site-packages/dlg/dropmake --name daliuge-translator --rm -t -p 8084:8084 icrar/daliuge-translator:${VCS_TAG} + docker run --volume $PWD/dlg/dropmake:/dlg/lib/python3.8/site-packages/dlg/dropmake --name daliuge-translator --rm -t -p 8084:8084 icrar/daliuge-translator:${VCS_TAG} exit 0;; *) echo "Usage run_translator.sh " diff --git a/daliuge-translator/test-requirements.txt b/daliuge-translator/test-requirements.txt index 9fe7ad47d..0ad5db537 100644 --- a/daliuge-translator/test-requirements.txt +++ b/daliuge-translator/test-requirements.txt @@ -1,3 +1,4 @@ gitpython ruamel.yaml==0.16.0; python_version=='2.7' typing>=3.7.4 +pyyaml>=6.0 diff --git a/daliuge-translator/test/dropmake/logical_graphs/SharedMemoryTest.graph b/daliuge-translator/test/dropmake/logical_graphs/SharedMemoryTest.graph index 715b0916f..246fa21f9 100644 --- a/daliuge-translator/test/dropmake/logical_graphs/SharedMemoryTest.graph +++ b/daliuge-translator/test/dropmake/logical_graphs/SharedMemoryTest.graph @@ -68,7 +68,7 @@ }, "nodeDataArray": [ { - "applicationParams": [], + "applicationArgs": [], "category": "PythonApp", "collapsed": false, "color": "#0059a5", @@ -154,7 +154,7 @@ "y": 563 }, { - "applicationParams": [], + "applicationArgs": [], "category": "SharedMemory", "collapsed": false, "color": "#2c2c2c", @@ -229,7 +229,7 @@ "y": 565 }, { - "applicationParams": [], + "applicationArgs": [], "category": "PythonApp", "collapsed": false, "color": "#0059a5", @@ -324,7 +324,7 @@ "y": 395 }, { - "applicationParams": [], + "applicationArgs": [], "category": "PythonApp", "collapsed": false, "color": "#0059a5", @@ -419,7 +419,7 @@ "y": 653 }, { - "applicationParams": [], + "applicationArgs": [], "category": "PythonApp", "collapsed": false, "color": "#0059a5", @@ -514,7 +514,7 @@ "y": 525 }, { - "applicationParams": [], + "applicationArgs": [], "category": "SharedMemory", "collapsed": false, "color": "#2c2c2c", @@ -580,7 +580,7 @@ "y": 384 }, { - "applicationParams": [], + "applicationArgs": [], "category": "SharedMemory", "collapsed": false, "color": "#2c2c2c", @@ -646,7 +646,7 @@ "y": 532 }, { - "applicationParams": [], + "applicationArgs": [], "category": "SharedMemory", "collapsed": false, "color": "#2c2c2c", diff --git a/daliuge-translator/test/dropmake/logical_graphs/eagle_gather.graph b/daliuge-translator/test/dropmake/logical_graphs/eagle_gather.graph index 6acdd1dc8..4927b98ae 100644 --- a/daliuge-translator/test/dropmake/logical_graphs/eagle_gather.graph +++ b/daliuge-translator/test/dropmake/logical_graphs/eagle_gather.graph @@ -85,7 +85,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -175,7 +175,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [ { "text": "Execution time", @@ -499,7 +499,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -582,7 +582,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -665,7 +665,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -857,7 +857,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -969,7 +969,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1059,7 +1059,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [ { "text": "Execution time", @@ -1242,7 +1242,7 @@ "inputLocalPorts": [], "outputLocalPorts": [], "fields": [], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1354,7 +1354,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1547,7 +1547,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1630,7 +1630,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1713,7 +1713,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], diff --git a/daliuge-translator/test/dropmake/logical_graphs/eagle_gather_empty.graph b/daliuge-translator/test/dropmake/logical_graphs/eagle_gather_empty.graph index de62bbe3b..dcf44d3c6 100644 --- a/daliuge-translator/test/dropmake/logical_graphs/eagle_gather_empty.graph +++ b/daliuge-translator/test/dropmake/logical_graphs/eagle_gather_empty.graph @@ -85,7 +85,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -175,7 +175,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [ { "text": "Execution time", @@ -499,7 +499,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -582,7 +582,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -665,7 +665,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -857,7 +857,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -969,7 +969,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1059,7 +1059,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [ { "text": "Execution time", @@ -1242,7 +1242,7 @@ "inputLocalPorts": [], "outputLocalPorts": [], "fields": [], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1324,7 +1324,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], diff --git a/daliuge-translator/test/dropmake/logical_graphs/lofar_std.graph b/daliuge-translator/test/dropmake/logical_graphs/lofar_std.graph index 27c27ec26..328942c5f 100644 --- a/daliuge-translator/test/dropmake/logical_graphs/lofar_std.graph +++ b/daliuge-translator/test/dropmake/logical_graphs/lofar_std.graph @@ -56,7 +56,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -116,7 +116,7 @@ "inputLocalPorts": [], "outputLocalPorts": [], "fields": [], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -168,7 +168,7 @@ "inputLocalPorts": [], "outputLocalPorts": [], "fields": [], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -228,7 +228,7 @@ "inputLocalPorts": [], "outputLocalPorts": [], "fields": [], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -312,7 +312,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -385,7 +385,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -458,7 +458,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -533,7 +533,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -606,7 +606,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -679,7 +679,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -852,7 +852,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1025,7 +1025,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1098,7 +1098,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1287,7 +1287,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1476,7 +1476,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1549,7 +1549,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1622,7 +1622,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1696,7 +1696,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1869,7 +1869,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -2050,7 +2050,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -2132,7 +2132,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -2205,7 +2205,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -2260,7 +2260,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -2337,7 +2337,7 @@ "inputLocalPorts": [], "outputLocalPorts": [], "fields": [], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -2509,7 +2509,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -2561,7 +2561,7 @@ "inputLocalPorts": [], "outputLocalPorts": [], "fields": [], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -2733,7 +2733,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -2806,7 +2806,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -2970,7 +2970,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -3051,7 +3051,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -3224,7 +3224,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -3294,7 +3294,7 @@ "inputLocalPorts": [], "outputLocalPorts": [], "fields": [], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -3477,7 +3477,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -3532,7 +3532,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -3584,7 +3584,7 @@ "inputLocalPorts": [], "outputLocalPorts": [], "fields": [], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -3658,7 +3658,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -3832,7 +3832,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -3915,7 +3915,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], diff --git a/daliuge-translator/test/dropmake/logical_graphs/test-20190830-110556.graph b/daliuge-translator/test/dropmake/logical_graphs/test-20190830-110556.graph index e6f6d80be..a2b286d24 100644 --- a/daliuge-translator/test/dropmake/logical_graphs/test-20190830-110556.graph +++ b/daliuge-translator/test/dropmake/logical_graphs/test-20190830-110556.graph @@ -1,415 +1,320 @@ { + "linkDataArray": [ + { + "from": -2, + "fromPort": "47c421b8-5cdc-4ff7-ab7e-b75140f2d951", + "loop_aware": "0", + "to": -4, + "toPort": "c05689f4-6c5a-47dc-b3b1-fcbdfa09e4df" + } + ], "modelData": { - "fileType": "", - "repoService": "GitHub", + "eagleCommitHash": "dce847d911db5b8d2245775bb6d5c719eb4aa061", + "eagleVersion": "v4.1.0", + "filePath": "test-20190830-110556.graph", + "fileType": "Graph", + "gitUrl": "", + "lastModifiedDatetime": 0, + "lastModifiedEmail": "", + "lastModifiedName": "", + "readonly": true, + "repo": "", "repoBranch": "", - "repo": "james-strauss-uwa/eagle-test", - "filePath": "test/test-20190830-110556.graph", - "sha": "", - "git_url": "" + "repoService": "Unknown", + "schemaVersion": "OJS", + "sha": "" }, "nodeDataArray": [ { - "category": "Memory", - "categoryType": "Data", - "isData": true, - "isGroup": false, - "canHaveInputs": true, - "canHaveOutputs": true, - "color": "#394BB2", - "drawOrderHint": 0, - "key": -1, - "text": "Enter label", - "description": "", - "x": 200, - "y": 100, - "width": 200, - "height": 200, - "collapsed": false, - "showPorts": false, - "streaming": false, - "subject": null, - "selected": false, - "expanded": false, - "inputApplicationName": "", - "outputApplicationName": "", - "exitApplicationName": "", - "inputApplicationType": "None", - "outputApplicationType": "None", - "exitApplicationType": "None", - "inputPorts": [], - "outputPorts": [ - { - "Id": "ab5ada14-04d7-4b03-816d-c43428d4e2e4", - "IdText": "event" - } - ], - "inputLocalPorts": [], - "outputLocalPorts": [], - "inputAppFields": [], - "outputAppFields": [], - "fields": [ - { - "text": "Data volume", - "name": "data_volume", - "value": "5", - "description": "" - }, - { - "text": "Group end", - "name": "group_end", - "value": "0", - "description": "" - } - ] - }, - { + "applicationArgs": [], "category": "BashShellApp", - "categoryType": "Application", - "isData": false, - "isGroup": false, - "canHaveInputs": true, - "canHaveOutputs": true, - "color": "#1C2833", - "drawOrderHint": 0, - "key": -2, - "text": "Enter label", + "collapsed": true, + "color": "#0059a5", "description": "", - "x": 400, - "y": 200, - "width": 200, - "height": 200, - "collapsed": false, - "showPorts": false, - "streaming": false, - "subject": null, - "selected": false, + "drawOrderHint": 0, "expanded": false, - "inputApplicationName": "", - "outputApplicationName": "", - "exitApplicationName": "", - "inputApplicationType": "None", - "outputApplicationType": "None", - "exitApplicationType": "None", - "inputPorts": [ - { - "Id": "c12aa833-43a9-4c1e-abaa-c77396010a31", - "IdText": "event" - } - ], - "outputPorts": [ - { - "Id": "47c421b8-5cdc-4ff7-ab7e-b75140f2d951", - "IdText": "event" - } - ], - "inputLocalPorts": [], - "outputLocalPorts": [], - "inputAppFields": [], - "outputAppFields": [], "fields": [ { - "text": "Execution time", + "defaultValue": "", + "description": "", "name": "execution_time", - "value": "5", - "description": "" + "precious": false, + "readonly": false, + "text": "Execution time", + "type": "Unknown", + "value": "5" }, { - "text": "Num CPUs", + "defaultValue": "", + "description": "", "name": "num_cpus", - "value": "1", - "description": "" + "precious": false, + "readonly": false, + "text": "Num CPUs", + "type": "Unknown", + "value": "1" }, { - "text": "Group start", + "defaultValue": "", + "description": "", "name": "group_start", - "value": "0", - "description": "" + "precious": false, + "readonly": false, + "text": "Group start", + "type": "Unknown", + "value": "0" }, { - "text": "Arg01", + "defaultValue": "", + "description": "", "name": "Arg01", - "value": "", - "description": "" + "precious": false, + "readonly": false, + "text": "Arg01", + "type": "Unknown", + "value": "" }, { - "text": "Arg02", + "defaultValue": "", + "description": "", "name": "Arg02", - "value": "", - "description": "" + "precious": false, + "readonly": false, + "text": "Arg02", + "type": "Unknown", + "value": "" }, { - "text": "Arg03", + "defaultValue": "", + "description": "", "name": "Arg03", - "value": "", - "description": "" + "precious": false, + "readonly": false, + "text": "Arg03", + "type": "Unknown", + "value": "" }, { - "text": "Arg04", + "defaultValue": "", + "description": "", "name": "Arg04", - "value": "", - "description": "" + "precious": false, + "readonly": false, + "text": "Arg04", + "type": "Unknown", + "value": "" }, { - "text": "Arg05", + "defaultValue": "", + "description": "", "name": "Arg05", - "value": "", - "description": "" + "precious": false, + "readonly": false, + "text": "Arg05", + "type": "Unknown", + "value": "" }, { - "text": "Arg06", + "defaultValue": "", + "description": "", "name": "Arg06", - "value": "", - "description": "" + "precious": false, + "readonly": false, + "text": "Arg06", + "type": "Unknown", + "value": "" }, { - "text": "Arg07", + "defaultValue": "", + "description": "", "name": "Arg07", - "value": "", - "description": "" + "precious": false, + "readonly": false, + "text": "Arg07", + "type": "Unknown", + "value": "" }, { - "text": "Arg08", + "defaultValue": "", + "description": "", "name": "Arg08", - "value": "", - "description": "" + "precious": false, + "readonly": false, + "text": "Arg08", + "type": "Unknown", + "value": "" }, { - "text": "Arg09", + "defaultValue": "", + "description": "", "name": "Arg09", - "value": "", - "description": "" + "precious": false, + "readonly": false, + "text": "Arg09", + "type": "Unknown", + "value": "" }, { - "text": "Arg10", + "defaultValue": "", + "description": "", "name": "Arg10", - "value": "", - "description": "" + "precious": false, + "readonly": false, + "text": "Arg10", + "type": "Unknown", + "value": "" + }, + { + "defaultValue": "echo Hello", + "description": "just a dummy command", + "name": "command", + "precious": false, + "readonly": false, + "text": "command", + "type": "String", + "value": "echo Hello" } - ] - }, - { - "category": "Component", - "categoryType": "Application", - "isData": false, - "isGroup": false, - "canHaveInputs": true, - "canHaveOutputs": true, - "color": "#3498DB", - "drawOrderHint": 0, - "key": -3, - "text": "Enter label", - "description": "", - "x": 600, - "y": 300, - "width": 200, - "height": 200, - "collapsed": false, - "showPorts": false, - "streaming": false, - "subject": null, - "selected": false, - "expanded": false, + ], + "flipPorts": false, + "git_url": "", + "height": 72, + "inputAppFields": [], + "inputApplicationKey": null, "inputApplicationName": "", - "outputApplicationName": "", - "exitApplicationName": "", "inputApplicationType": "None", - "outputApplicationType": "None", - "exitApplicationType": "None", + "inputLocalPorts": [], "inputPorts": [ { - "Id": "0178b7ce-79ed-406d-9e4b-ee2a53c168ec", - "IdText": "event" + "Id": "c12aa833-43a9-4c1e-abaa-c77396010a31", + "IdText": "event", + "description": "", + "event": false, + "text": "", + "type": "" } ], + "isGroup": false, + "key": -2, + "outputAppFields": [], + "outputApplicationKey": null, + "outputApplicationName": "", + "outputApplicationType": "None", + "outputLocalPorts": [], "outputPorts": [ { - "Id": "f487361b-f633-43ba-978d-c65d7a52c34d", - "IdText": "event" + "Id": "47c421b8-5cdc-4ff7-ab7e-b75140f2d951", + "IdText": "event", + "description": "", + "event": false, + "text": "", + "type": "" } ], - "inputLocalPorts": [], - "outputLocalPorts": [], - "inputAppFields": [], - "outputAppFields": [], - "fields": [ - { - "text": "Execution time", - "name": "execution_time", - "value": "5", - "description": "" - }, - { - "text": "Num CPUs", - "name": "num_cpus", - "value": "1", - "description": "" - }, - { - "text": "Group start", - "name": "group_start", - "value": "0", - "description": "" - }, - { - "text": "Appclass", - "name": "appclass", - "value": "test.graphsRepository", - "description": "" - }, - { - "text": "Arg01", - "name": "Arg01", - "value": "", - "description": "" - }, - { - "text": "Arg02", - "name": "Arg02", - "value": "", - "description": "" - }, - { - "text": "Arg03", - "name": "Arg03", - "value": "", - "description": "" - }, - { - "text": "Arg04", - "name": "Arg04", - "value": "", - "description": "" - }, - { - "text": "Arg05", - "name": "Arg05", - "value": "", - "description": "" - }, - { - "text": "Arg06", - "name": "Arg06", - "value": "", - "description": "" - }, - { - "text": "Arg07", - "name": "Arg07", - "value": "", - "description": "" - }, - { - "text": "Arg08", - "name": "Arg08", - "value": "", - "description": "" - }, - { - "text": "Arg09", - "name": "Arg09", - "value": "", - "description": "" - }, - { - "text": "Arg10", - "name": "Arg10", - "value": "", - "description": "" - } - ] + "precious": false, + "readonly": true, + "sha": "", + "streaming": false, + "subject": null, + "text": "Enter label", + "width": 200, + "x": 400, + "y": 200 }, { + "applicationArgs": [], "category": "File", - "categoryType": "Data", - "isData": true, - "isGroup": false, - "canHaveInputs": true, - "canHaveOutputs": true, - "color": "#394BB2", - "drawOrderHint": 0, - "key": -4, - "text": "Enter label", + "collapsed": true, + "color": "#2c2c2c", "description": "", - "x": 800, - "y": 400, - "width": 200, - "height": 200, - "collapsed": false, - "showPorts": false, - "streaming": false, - "subject": null, - "selected": false, + "drawOrderHint": 0, "expanded": false, - "inputApplicationName": "", - "outputApplicationName": "", - "exitApplicationName": "", - "inputApplicationType": "None", - "outputApplicationType": "None", - "exitApplicationType": "None", - "inputPorts": [ - { - "Id": "c05689f4-6c5a-47dc-b3b1-fcbdfa09e4df", - "IdText": "event" - } - ], - "outputPorts": [], - "inputLocalPorts": [], - "outputLocalPorts": [], - "inputAppFields": [], - "outputAppFields": [], "fields": [ { - "text": "Data volume", + "defaultValue": "", + "description": "", "name": "data_volume", - "value": "5", - "description": "" + "precious": false, + "readonly": false, + "text": "Data volume", + "type": "Unknown", + "value": "5" }, { - "text": "Group end", + "defaultValue": "", + "description": "", "name": "group_end", - "value": "0", - "description": "" + "precious": false, + "readonly": false, + "text": "Group end", + "type": "Unknown", + "value": "0" }, { - "text": "Check file path exists", + "defaultValue": "", + "description": "", "name": "check_filepath_exists", - "value": "1", - "description": "" + "precious": false, + "readonly": false, + "text": "Check file path exists", + "type": "Unknown", + "value": "1" }, { - "text": "File path", + "defaultValue": "", + "description": "", "name": "filepath", - "value": "", - "description": "" + "precious": false, + "readonly": false, + "text": "File path", + "type": "Unknown", + "value": "" }, { - "text": "Directory name", + "defaultValue": "", + "description": "", "name": "dirname", - "value": "", - "description": "" + "precious": false, + "readonly": false, + "text": "Directory name", + "type": "Unknown", + "value": "" } - ] - } - ], - "linkDataArray": [ - { - "from": -1, - "fromPort": "ab5ada14-04d7-4b03-816d-c43428d4e2e4", - "to": -2, - "toPort": "c12aa833-43a9-4c1e-abaa-c77396010a31" - }, - { - "from": -2, - "fromPort": "47c421b8-5cdc-4ff7-ab7e-b75140f2d951", - "to": -3, - "toPort": "0178b7ce-79ed-406d-9e4b-ee2a53c168ec" - }, - { - "from": -3, - "fromPort": "f487361b-f633-43ba-978d-c65d7a52c34d", - "to": -4, - "toPort": "c05689f4-6c5a-47dc-b3b1-fcbdfa09e4df" + ], + "flipPorts": false, + "git_url": "", + "height": 72, + "inputAppFields": [], + "inputApplicationKey": null, + "inputApplicationName": "", + "inputApplicationType": "None", + "inputLocalPorts": [], + "inputPorts": [ + { + "Id": "c05689f4-6c5a-47dc-b3b1-fcbdfa09e4df", + "IdText": "event", + "description": "", + "event": false, + "text": "", + "type": "" + } + ], + "isGroup": false, + "key": -4, + "outputAppFields": [], + "outputApplicationKey": null, + "outputApplicationName": "", + "outputApplicationType": "None", + "outputLocalPorts": [], + "outputPorts": [], + "precious": false, + "readonly": true, + "sha": "", + "streaming": false, + "subject": null, + "text": "Enter label", + "width": 200, + "x": 800, + "y": 400 } ] } \ No newline at end of file diff --git a/daliuge-translator/test/dropmake/logical_graphs/testLoop.graph b/daliuge-translator/test/dropmake/logical_graphs/testLoop.graph index 2ed50cbd1..9dbb8c973 100644 --- a/daliuge-translator/test/dropmake/logical_graphs/testLoop.graph +++ b/daliuge-translator/test/dropmake/logical_graphs/testLoop.graph @@ -1,414 +1,426 @@ { + "linkDataArray": [ + { + "from": -4, + "fromPort": "1765f421-782c-4972-ab76-ac0c23bd95e3", + "loop_aware": "1", + "to": -3, + "toPort": "102d6fc6-eea7-434b-9196-65a42a79cf13" + }, + { + "from": -3, + "fromPort": "b8739916-45c0-41e1-b38d-afede242bcfd", + "loop_aware": "1", + "to": -2, + "toPort": "b133f803-42f7-4c70-83b7-803bf8ec3035" + } + ], "modelData": { + "eagleCommitHash": "dce847d911db5b8d2245775bb6d5c719eb4aa061", + "eagleVersion": "v4.1.0", + "filePath": "testLoop.graph", "fileType": "Graph", - "repoService": "GitHub", - "repoBranch": "yan-812-2", - "repo": "ICRAR/daliuge", - "filePath": "daliuge-translator/test/dropmake/logical_graphs/testLoop.graph", - "eagleVersion": "Unknown", - "eagleCommitHash": "Unknown", - "schemaVersion": "OJS", - "readonly": true, - "sha": "", "gitUrl": "", - "lastModifiedName": "", + "lastModifiedDatetime": 1644027730, "lastModifiedEmail": "", - "lastModifiedDatetime": "" + "lastModifiedName": "", + "readonly": true, + "repo": "", + "repoBranch": "", + "repoService": "Unknown", + "schemaVersion": "OJS", + "sha": "" }, "nodeDataArray": [ { + "applicationArgs": [], "category": "Loop", - "isData": false, - "isGroup": true, - "canHaveInputs": false, - "canHaveOutputs": false, + "collapsed": false, "color": "rgb(221, 173, 0)", - "drawOrderHint": 0, - "key": -1, - "text": "Loop", "description": "Placeholder 'loop' description", - "x": 606.4859101595122, - "y": 337.06816127722925, - "width": 385.98717625064404, - "height": 378.12856316963075, - "collapsed": false, - "flipPorts": false, - "streaming": false, - "precious": false, - "subject": null, + "drawOrderHint": 0, "expanded": true, - "readonly": true, - "git_url": "", - "sha": "", - "inputPorts": [ + "fields": [ { - "Id": "123e02e1-0e35-47a3-8dca-2525f71a6766", - "IdText": "event", - "text": "", - "event": false, - "type": "", - "description": "" + "defaultValue": "", + "description": "", + "name": "num_of_iter", + "precious": false, + "readonly": false, + "text": "Number loops", + "type": "Unknown", + "value": "5" } ], - "outputPorts": [ + "flipPorts": false, + "git_url": "", + "height": 378.12856316963075, + "inputAppFields": [], + "inputApplicationKey": -5, + "inputApplicationName": "event", + "inputApplicationType": "UnknownApplication", + "inputLocalPorts": [ { "Id": "1eeee75a-c2d6-49ea-b94b-5c208b8f3f0c", "IdText": "event", - "text": "", + "description": "", "event": false, - "type": "", - "description": "" - } - ], - "inputLocalPorts": [ + "text": "", + "type": "" + }, { "Id": "a21aba04-cf68-4dab-b57f-dd0dba9e4c91", "IdText": "event", - "text": "", + "description": "", "event": false, - "type": "", - "description": "" - } - ], - "outputLocalPorts": [ - { - "Id": "05863afc-5933-4eef-9f64-5e10a4b86ac6", - "IdText": "event", "text": "", - "event": false, - "type": "", - "description": "" + "type": "" } ], - "fields": [ + "inputPorts": [ { - "text": "Number loops", - "name": "num_of_iter", - "value": "5", - "defaultValue": "", + "Id": "123e02e1-0e35-47a3-8dca-2525f71a6766", + "IdText": "event", "description": "", - "readonly": false, - "type": "Unknown", - "precious": false + "event": false, + "text": "", + "type": "" } ], - "applicationParams": [], - "inputAppFields": [], + "isGroup": true, + "key": -1, "outputAppFields": [], - "exitAppFields": [], - "inputApplicationName": "event", - "inputApplicationType": "UnknownApplication", - "inputApplicationKey": -5, + "outputApplicationKey": null, "outputApplicationName": "", "outputApplicationType": "None", - "outputApplicationKey": null, - "exitApplicationName": "event", - "exitApplicationType": "UnknownApplication", - "exitApplicationKey": -6 + "outputLocalPorts": [], + "outputPorts": [], + "precious": false, + "readonly": true, + "sha": "", + "streaming": false, + "subject": null, + "text": "Loop", + "width": 385.98717625064404, + "x": 606.4859101595122, + "y": 337.06816127722925 }, { + "applicationArgs": [], "category": "BashShellApp", - "isData": false, - "isGroup": false, - "canHaveInputs": true, - "canHaveOutputs": true, + "collapsed": true, "color": "#0059a5", - "drawOrderHint": 0, - "key": -2, - "text": "SleepExternal", "description": "An application component run within the Bash Shell", - "x": 736.1530259962292, - "y": 855.7366246240957, - "width": 200, - "height": 72, - "collapsed": true, - "flipPorts": false, - "streaming": false, - "precious": false, - "subject": null, + "drawOrderHint": 0, "expanded": false, - "readonly": true, - "git_url": "", - "sha": "", - "inputPorts": [ - { - "Id": "b133f803-42f7-4c70-83b7-803bf8ec3035", - "IdText": "event", - "text": "", - "event": false, - "type": "", - "description": "" - } - ], - "outputPorts": [ - { - "Id": "1765f421-782c-4972-ab76-ac0c23bd95e3", - "IdText": "event", - "text": "", - "event": false, - "type": "", - "description": "" - } - ], - "inputLocalPorts": [], - "outputLocalPorts": [], "fields": [ { - "text": "Execution time", - "name": "execution_time", - "value": "5", "defaultValue": "", "description": "", + "name": "execution_time", + "precious": false, "readonly": false, + "text": "Execution time", "type": "Unknown", - "precious": false + "value": "5" }, { - "text": "Num CPUs", - "name": "num_cpus", - "value": "1", "defaultValue": "", "description": "", + "name": "num_cpus", + "precious": false, "readonly": false, + "text": "Num CPUs", "type": "Unknown", - "precious": false + "value": "1" }, { - "text": "Group start", - "name": "group_start", - "value": "0", "defaultValue": "", "description": "", + "name": "group_start", + "precious": false, "readonly": false, + "text": "Group start", "type": "Unknown", - "precious": false + "value": "0" }, { - "text": "Arg01", - "name": "Arg01", - "value": "sleep 5", "defaultValue": "", "description": "The command line to be executed", + "name": "Arg01", + "precious": false, "readonly": false, + "text": "Arg01", "type": "Unknown", - "precious": false + "value": "5" + }, + { + "defaultValue": "sleep", + "description": "", + "name": "command", + "precious": false, + "readonly": false, + "text": "command", + "type": "String", + "value": "sleep" } ], - "applicationParams": [], + "flipPorts": false, + "git_url": "", + "height": 72, "inputAppFields": [], - "outputAppFields": [], - "exitAppFields": [], + "inputApplicationKey": null, "inputApplicationName": "", "inputApplicationType": "None", - "inputApplicationKey": null, - "outputApplicationName": "", - "outputApplicationType": "None", - "outputApplicationKey": null, - "exitApplicationName": "", - "exitApplicationType": "None", - "exitApplicationKey": null - }, - { - "category": "Memory", - "isData": true, - "isGroup": false, - "canHaveInputs": true, - "canHaveOutputs": true, - "color": "#2c2c2c", - "drawOrderHint": 0, - "key": -3, - "text": "Memory", - "description": "", - "x": 700.7892671316696, - "y": 593.7828552569915, - "width": 200, - "height": 72, - "collapsed": true, - "flipPorts": false, - "streaming": false, - "precious": false, - "subject": null, - "expanded": false, - "readonly": true, - "git_url": "", - "sha": "", - "group": -1, + "inputLocalPorts": [], "inputPorts": [ { - "Id": "ae981288-d839-4890-bb11-f54f336cfad0", + "Id": "b133f803-42f7-4c70-83b7-803bf8ec3035", "IdText": "event", - "text": "", + "description": "", "event": false, - "type": "", - "description": "" + "text": "", + "type": "" } ], + "isGroup": false, + "key": -2, + "outputAppFields": [], + "outputApplicationKey": null, + "outputApplicationName": "", + "outputApplicationType": "None", + "outputLocalPorts": [], "outputPorts": [ { - "Id": "b7c36b5e-dbe7-4d9d-ad2f-abec81071de5", + "Id": "1765f421-782c-4972-ab76-ac0c23bd95e3", "IdText": "event", - "text": "", + "description": "", "event": false, - "type": "", - "description": "" + "text": "", + "type": "" } ], - "inputLocalPorts": [], - "outputLocalPorts": [], + "precious": false, + "readonly": true, + "sha": "", + "streaming": false, + "subject": null, + "text": "SleepExternal", + "width": 200, + "x": 736.1530259962292, + "y": 855.7366246240957 + }, + { + "applicationArgs": [], + "category": "BashShellApp", + "collapsed": false, + "color": "#0059a5", + "description": "An application component run within the Bash Shell", + "drawOrderHint": 0, + "expanded": false, "fields": [ { - "text": "Data volume", - "name": "data_volume", - "value": "5", "defaultValue": "", "description": "", + "name": "execution_time", + "precious": false, "readonly": false, + "text": "Execution time", "type": "Unknown", - "precious": false + "value": "5" }, { - "text": "Group end", - "name": "group_end", - "value": "1", "defaultValue": "", "description": "", + "name": "num_cpus", + "precious": false, "readonly": false, + "text": "Num CPUs", "type": "Unknown", - "precious": false + "value": "1" + }, + { + "defaultValue": "false", + "description": "", + "name": "group_start", + "precious": false, + "readonly": false, + "text": "Group start", + "type": "Boolean", + "value": true + }, + { + "defaultValue": "", + "description": "The command line to be executed", + "name": "Arg01", + "precious": false, + "readonly": false, + "text": "Arg01", + "type": "Unknown", + "value": "3" + }, + { + "defaultValue": "sleep", + "description": "", + "name": "command", + "precious": false, + "readonly": false, + "text": "command", + "type": "String", + "value": "sleep" } ], - "applicationParams": [], - "inputAppFields": [], - "outputAppFields": [], - "exitAppFields": [], - "inputApplicationName": "", - "inputApplicationType": "None", - "inputApplicationKey": null, - "outputApplicationName": "", - "outputApplicationType": "None", - "outputApplicationKey": null, - "exitApplicationName": "", - "exitApplicationType": "None", - "exitApplicationKey": null - }, - { - "category": "BashShellApp", - "isData": false, - "isGroup": false, - "canHaveInputs": true, - "canHaveOutputs": true, - "color": "#0059a5", - "drawOrderHint": 0, - "key": -4, - "text": "SleepInternal", - "description": "An application component run within the Bash Shell", - "x": 692.9306540506565, - "y": 454.94735749242614, - "width": 200, - "height": 72, - "collapsed": false, "flipPorts": false, - "streaming": false, - "precious": false, - "subject": null, - "expanded": false, - "readonly": true, "git_url": "", - "sha": "", "group": -1, + "height": 72, + "inputAppFields": [], + "inputApplicationKey": null, + "inputApplicationName": "", + "inputApplicationType": "None", + "inputLocalPorts": [], "inputPorts": [ { "Id": "b133f803-42f7-4c70-83b7-803bf8ec3035", "IdText": "event", - "text": "", + "description": "", "event": false, - "type": "", - "description": "" + "text": "", + "type": "" } ], + "isGroup": false, + "key": -4, + "outputAppFields": [], + "outputApplicationKey": null, + "outputApplicationName": "", + "outputApplicationType": "None", + "outputLocalPorts": [], "outputPorts": [ { "Id": "1765f421-782c-4972-ab76-ac0c23bd95e3", "IdText": "event", - "text": "", + "description": "", "event": false, - "type": "", - "description": "" + "text": "", + "type": "" } ], - "inputLocalPorts": [], - "outputLocalPorts": [], + "precious": false, + "readonly": true, + "sha": "", + "streaming": false, + "subject": null, + "text": "SleepInternal", + "width": 200, + "x": 692.9306540506565, + "y": 454.94735749242614 + }, + { + "applicationArgs": [], + "category": "File", + "collapsed": true, + "color": "#2c2c2c", + "description": "A standard file on a filesystem mounted to the deployment machine", + "drawOrderHint": 0, + "expanded": false, "fields": [ { - "text": "Execution time", - "name": "execution_time", - "value": "5", - "defaultValue": "", - "description": "", + "defaultValue": "5", + "description": "Estimated size of the data contained in this node", + "name": "data_volume", + "precious": false, "readonly": false, - "type": "Unknown", - "precious": false + "text": "Data volume", + "type": "Float", + "value": 5 }, { - "text": "Num CPUs", - "name": "num_cpus", - "value": "1", - "defaultValue": "", - "description": "", + "defaultValue": "false", + "description": "Is this node the end of a group?", + "name": "group_end", + "precious": false, "readonly": false, - "type": "Unknown", - "precious": false + "text": "Group end", + "type": "Boolean", + "value": true + }, + { + "defaultValue": "true", + "description": "Perform a check to make sure the file path exists before proceeding with the application", + "name": "check_filepath_exists", + "precious": false, + "readonly": false, + "text": "Check file path exists", + "type": "Boolean", + "value": true }, { - "text": "Group start", - "name": "group_start", - "value": "1", "defaultValue": "", - "description": "", + "description": "Path to the file for this node", + "name": "filepath", + "precious": false, "readonly": false, - "type": "Unknown", - "precious": false + "text": "File path", + "type": "String", + "value": "" }, { - "text": "Arg01", - "name": "Arg01", - "value": "sleep 3", "defaultValue": "", - "description": "The command line to be executed", + "description": "Name of the directory containing the file for this node", + "name": "dirname", + "precious": false, "readonly": false, - "type": "Unknown", - "precious": false + "text": "Directory name", + "type": "String", + "value": "" } ], - "applicationParams": [], + "flipPorts": false, + "git_url": "", + "group": -1, + "height": 72, "inputAppFields": [], - "outputAppFields": [], - "exitAppFields": [], + "inputApplicationKey": null, "inputApplicationName": "", "inputApplicationType": "None", - "inputApplicationKey": null, + "inputLocalPorts": [], + "inputPorts": [ + { + "Id": "102d6fc6-eea7-434b-9196-65a42a79cf13", + "IdText": "event", + "description": "", + "event": false, + "text": "event", + "type": "event" + } + ], + "isGroup": false, + "key": -3, + "outputAppFields": [], + "outputApplicationKey": null, "outputApplicationName": "", "outputApplicationType": "None", - "outputApplicationKey": null, - "exitApplicationName": "", - "exitApplicationType": "None", - "exitApplicationKey": null - } - ], - "linkDataArray": [ - { - "from": -4, - "fromPort": "1765f421-782c-4972-ab76-ac0c23bd95e3", - "to": -3, - "toPort": "ae981288-d839-4890-bb11-f54f336cfad0", - "loop_aware": "0" - }, - { - "from": -3, - "fromPort": "b7c36b5e-dbe7-4d9d-ad2f-abec81071de5", - "to": -2, - "toPort": "b133f803-42f7-4c70-83b7-803bf8ec3035", - "loop_aware": "1" + "outputLocalPorts": [], + "outputPorts": [ + { + "Id": "b8739916-45c0-41e1-b38d-afede242bcfd", + "IdText": "event", + "description": "", + "event": false, + "text": "event", + "type": "event" + } + ], + "precious": false, + "readonly": true, + "sha": "", + "streaming": false, + "subject": null, + "text": "event", + "width": 200, + "x": 723.5418400234428, + "y": 594.3419910582609 } ] } \ No newline at end of file diff --git a/daliuge-translator/test/dropmake/logical_graphs/test_grpby_gather.graph b/daliuge-translator/test/dropmake/logical_graphs/test_grpby_gather.graph index f1d25a7bc..d69c19e6c 100644 --- a/daliuge-translator/test/dropmake/logical_graphs/test_grpby_gather.graph +++ b/daliuge-translator/test/dropmake/logical_graphs/test_grpby_gather.graph @@ -54,7 +54,7 @@ "inputLocalPorts": [], "outputLocalPorts": [], "fields": [], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -117,7 +117,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -191,7 +191,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -266,7 +266,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -447,7 +447,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -520,7 +520,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -617,7 +617,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -781,7 +781,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -945,7 +945,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1009,7 +1009,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1082,7 +1082,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1134,7 +1134,7 @@ "inputLocalPorts": [], "outputLocalPorts": [], "fields": [], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1289,7 +1289,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1362,7 +1362,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1426,7 +1426,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1487,7 +1487,7 @@ "inputLocalPorts": [], "outputLocalPorts": [], "fields": [], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1541,7 +1541,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1595,7 +1595,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1769,7 +1769,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1823,7 +1823,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], @@ -1877,7 +1877,7 @@ "precious": false } ], - "applicationParams": [], + "applicationArgs": [], "inputAppFields": [], "outputAppFields": [], "exitAppFields": [], diff --git a/docs/architecture/graphs.rst b/docs/architecture/graphs.rst index bcfc9169e..4125fdf30 100644 --- a/docs/architecture/graphs.rst +++ b/docs/architecture/graphs.rst @@ -258,6 +258,22 @@ Shrunk memory will be truncated, grown blocks will contain a copy of the old dat As mentioned previously, if DALiuGE is configured to utilise multiple cores, there is no need to specifically use SharedMemoryDROPs, InMemoryDROPs will be switched automatically. However, if the need arises, one can specifically use SharedMemoryDROPs. +Environment Variables +^^^^^^^^^^^^^^^^^^^^^ +Often, several workflow components rely on shared global configuration values, usually stored in +imaginatively named configuration files. +DALiuGE supports this approach, of course, but offers additional, more transparent options. +The EnvironmentVarDROP is a simple key-value store accessible at runtime by all drops in a workflow. +One can include multiple ``EnivronmentVarDROP``s in a single workflow, **but each variable store must have a unique name**. +In a logical graph, reference environment variables as component or application parameters with the following syntax: +``${EnvironmentVarDROP_Name}.{Variable_name}`` +The translator and engine handle parsing and filling of these parameters automatically. +Variables beginning with ``$DLG_``, such as ``$DLG_ROOT`` are an exception which are handled seperately. +These variables come from the deployment themselves and are fetched from the deployment environment at runtime. + +One may also access these variables individually at runtime using the ``get_environment_variable(key)`` function, which accepts a key in the syntax mentioned above, returning ``None`` if the variable store or key does not exist. + + .. |lgt| replace:: *logical graph template* .. |lg| replace:: *logical graph* .. |pgt| replace:: *physical graph template* diff --git a/docs/deployment.rst b/docs/deployment.rst index 43eb4af9f..8688d21fe 100644 --- a/docs/deployment.rst +++ b/docs/deployment.rst @@ -66,6 +66,15 @@ Deployment with OpenOnDemand `OpenOnDemand `_ (OOD) is a system providing an interactive interface to remote compute resources. It is becoming increasingly popular with a number of HPC centers around the world. The two Australian research HPC centers Pawsey and NCI are planning to roll it out for their users. Independently we had realized that |daliuge| is missing a authentication, authorization and session management system and started looking into OOD as a solution for this. After a short evaluation we have started integrating OOD into the deployment for our small in-house compute cluster. In order to make this work we needed to implement an additional interface between the translator running on an external server (e.g. AWS) and OOD and then further on into the (SLURM) batch job system. This interface code is currently in a separate private git repository, but will be released as soon as we have finished testing it. The code mimics the |daliuge| data island manager's REST interface, but instead of launching the workflow directly it prepares a SLURM job submission script and places it into the queue. Users can then use the standard OOD web-pages to monitor the jobs and get access to the logs and results of the workflow execution. OOD allows the integration of multiple compute resources, including Kubernetes and also (to a certain degree) GCP, AWS and Azure. Once configured, users can choose to submit their jobs to any of those. Our OOD interface code has been implemented as an OOD embedded `Phusion Passenger `_ `Flask `_ application, which is `WSGI `_ compliant. Very little inside that application is OOD specific and can thus be easily ported to other deployment scenarios. +Deployment with Kubernetes (Coming Soon) + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Kubernetes is a canonical container orchestration system. +We are building support to deploy workflows as helm charts which will enable easier and more reliably deployments across more computing facilities. +Support is currently limited but watch this space. + + Component Deployment ==================== @@ -89,4 +98,10 @@ Python components Components written in Python provide direct access to the whole |daliuge| engine runtime. They can use direct remote procedure calls and memory sharing even across multiple compute nodes. By default the engine is configured to use the multiprocessing module to launch the *application code* of the components using a maximum number of processes equal to the number of physical cores available on the computer. If there are more components than cores, then they are executed in serial. More advanced Python components, which are not restricted by the Python Global Interpreter Lock (GIL) don't really need this mechanism. Memory data components will automatically switch to use shared memory blocks between those processes. Note that the *component code* will still run in a single process together with the node manager. In the future, in order to minimize side effects, we might entirely switch to using separate processes for the execution of application code. -In order to be able to use Python components, it must be possible for the engine to import the code and thus it must be accessible on the PYTHONPATH at runtime. By default the engine is configured to add the directory $DLG_ROOT/code to the PYTHONPATH and thus users can install their code there. In the case of running |daliuge| in docker containers $DLG_ROOT is mounted from the host and thus also the subdirectory code is modifyable directly on the host. In a typical HPC deployment scenario that directory will be on the user's home directory, or a shared volume, visible to all compute nodes. \ No newline at end of file +In order to be able to use Python components, it must be possible for the engine to import the code and thus it must be accessible on the PYTHONPATH at runtime. By default the engine is configured to add the directory $DLG_ROOT/code to the PYTHONPATH and thus users can install their code there using a command like: + +.. code-block:: none + + docker exec -ti daliuge-engine bash -c "pip install --prefix=\$DLG_ROOT/code dlg_example_cmpts" + +Please note that the '\' character is required for this to work correctly. In the case of running |daliuge| in docker containers $DLG_ROOT is mounted from the host and thus also the subdirectory code is visible directly on the host. In a typical HPC deployment scenario that directory will be on the user's home directory, or a shared volume, visible to all compute nodes. \ No newline at end of file diff --git a/docs/development/app_development/bash_components.rst b/docs/development/app_development/bash_components.rst index 9ba8d05ec..1358d0752 100644 --- a/docs/development/app_development/bash_components.rst +++ b/docs/development/app_development/bash_components.rst @@ -60,6 +60,26 @@ In addition to the session log file the same information is also contained in th When you now deploy the graph again and watch the terminal output, you will see a lot of messages pass through. +Treatment of Command Line Parameters +------------------------------------ +|daliuge| has multiple ways to pass command line parameters to a bash command. The same feature is also used for Docker, Singularity and MPI components. In order to facilitate this |daliuge| is using a few reserved component parameters: + +* command: The value is used as the command (utility) to be executed. It is possible to specify more complex command lines in this value, but the end-user needs to know the syntax. +* command_line_arguments: Similar to the command, but the value only contains any additional arguments. Again the end-user needs to know all the details. +* Arg00 - Arg09: (.. deprecated:: use applicationArgs instead, only kept for backwards compatibility) +* applicationArgs: This is serialized as separate dictionary in JSON and every applicationParam is one entry where the key is the parameter name. This is the most recent way of defining and passing arguments on the command line and allows the developer to define every single argument in detail, including some description text, default value, write protection and type specification/verification. EAGLE displays the complete set and allows users to spicify and modify the content. |daliuge| will process and concatenate all of the parameters and attach them to the command line. In order to enable the user/developer to control the behaviour of the processing there are two additional parameters: +* argumentPrefix: The value is prepended to each of the parameter names of the applicationArgs. If not specified, the default is '--'. In addition, if argumentPrefix=='--' and an argument name is only a single character long, the argumentPrefix will be changed to '-'. This allows the construction of POSIX compliant option arguments as well as short argument names. +* paramValueSeparator: The value is used to concatenate the argument name and the value. The default is ' ' (space). Some utilities are using a syntax like 'arg=value'. In that case this parameter can be set accordingly. +* input_redirection: the value will be prepended to the command line (cmd) using 'cat {value} > '. +* output_redirection: the value will be appended to the command line (cmd) using '> {value}'. + +Not all of them need to be present in a component, only the ones the component developer wants to offer to the user. In particular the applicationArgs have been introduced to support complex utilties which can feature more than 100 arguments (think about tar). If more than one way of specifying arguments is available to an end-user, they can be used together, but the order in which these parts are concatenated might produce unwanted results. The final command line is constructed in the following way (not including the deprecated ArgXX parameters):: + cat {input_redirection.value} > {command.value} {argumentPrefix.value}{applicationArgs.name}{paramValueSeparator}{applicationArgs.value} {command_line_arguments.value} > {output_redirection} + +The applicationArgs are treated in the order of appearance. After the construction of the command line, any placeholder strings will be replaced with actual values. In particular strings of the form '%iX' (where X is the index number of the inputs of this component), will be replaced with the input URL of the input with that index (counting from 0). Similarly '%oX' will be replaced with the respective output URLs. + +Eventually we will also drop support for the command_line_arguments parameters. However, currently the applicationArgs can't be used to specify positional arguments (just a value) and thus, as a fallback users can still use one the command_line_arguments to achieve that. It should also be noted that for really simple commands, like the one used in the helloWorld example, users can simply specify that in the command parameter directly and ommit all of the others. + .. _advanced_bash: Advanced Bash Components diff --git a/lalo/A b/lalo/A new file mode 100644 index 000000000..4e4020211 --- /dev/null +++ b/lalo/A @@ -0,0 +1 @@ +SGE]w3 \ No newline at end of file diff --git a/lalo/C b/lalo/C new file mode 100644 index 000000000..4e4020211 --- /dev/null +++ b/lalo/C @@ -0,0 +1 @@ +SGE]w3 \ No newline at end of file diff --git a/tools/xml2palette/xml2palette.py b/tools/xml2palette/xml2palette.py index 304a67563..a790ea261 100644 --- a/tools/xml2palette/xml2palette.py +++ b/tools/xml2palette/xml2palette.py @@ -32,28 +32,31 @@ ] -def get_filenames_from_command_line(argv): +def get_options_from_command_line(argv): inputdir = "" + tag = "" outputfile = "" try: - opts, args = getopt.getopt(argv, "hi:o:", ["idir=", "ofile="]) + opts, args = getopt.getopt(argv, "hi:t:o:", ["idir=", "tag=", "ofile="]) except getopt.GetoptError: - print("xml2palette.py -i -o ") + print("xml2palette.py -i -t -o ") sys.exit(2) if len(opts) < 2: - print("xml2palette.py -i -o ") + print("xml2palette.py -i -t -o ") sys.exit() for opt, arg in opts: if opt == "-h": - print("xml2palette.py -i -o ") + print("xml2palette.py -i -t -o ") sys.exit() elif opt in ("-i", "--idir"): inputdir = arg + elif opt in ("-t", "--tag"): + tag = arg elif opt in ("-o", "--ofile"): outputfile = arg - return inputdir, outputfile + return inputdir, tag, outputfile def check_environment_variables(): @@ -145,7 +148,7 @@ def find_field_by_name(fields, name): def add_required_fields_for_category(fields, category): - if category == "DynlibApp": + if category in ["DynlibApp", "PythonApp", "Branch", "BashShellApp", "Mpi", "Docker"]: if find_field_by_name(fields, "execution_time") is None: fields.append( create_field( @@ -170,6 +173,8 @@ def add_required_fields_for_category(fields, category): False, ) ) + + if category in ["DynlibApp", "PythonApp", "Branch", "BashShellApp", "Docker"]: if find_field_by_name(fields, "group_start") is None: fields.append( create_field( @@ -182,61 +187,52 @@ def add_required_fields_for_category(fields, category): False, ) ) + if category == "DynlibApp": if find_field_by_name(fields, "libpath") is None: fields.append( create_field( "libpath", "Library path", "", "", "readwrite", "String", False ) ) - elif category == "PythonApp": - if find_field_by_name(fields, "execution_time") is None: + if category in ["PythonApp", "Branch"]: + if find_field_by_name(fields, "appclass") is None: fields.append( create_field( - "execution_time", - "Execution time", - 5, - "Estimated execution time", + "appclass", + "Appclass", + "dlg.apps.simple.SleepApp", + "Application class", "readwrite", - "Float", + "String", False, ) ) - if find_field_by_name(fields, "num_cpus") is None: + if category in ["File", "Memory", "NGAS", "ParameterSet", "Plasma", "PlasmaFlight", "S3"]: + if find_field_by_name(fields, "data_volume") is None: fields.append( create_field( - "num_cpus", - "Num CPUs", - 1, - "Number of cores used", + "data_volume", + "Data volume", + 5, + "Estimated size of the data contained in this node", "readwrite", "Integer", False, ) ) - if find_field_by_name(fields, "group_start") is None: + if category in ["File", "Memory", "NGAS", "ParameterSet", "Plasma", "PlasmaFlight", "S3", "Mpi"]: + if find_field_by_name(fields, "group_end") is None: fields.append( create_field( - "group_start", - "Group start", + "group_end", + "Group end", "false", - "Component is start of a group", + "Component is end of a group", "readwrite", "Boolean", False, ) ) - if find_field_by_name(fields, "appclass") is None: - fields.append( - create_field( - "appclass", - "Appclass", - "dlg.apps.simple.SleepApp", - "Application class", - "readwrite", - "String", - False, - ) - ) def create_field(internal_name, name, value, description, access, type, precious): @@ -368,6 +364,7 @@ def create_palette_node_from_params(params): text = "" description = "" category = "" + tag = "" categoryType = "" inputPorts = [] outputPorts = [] @@ -385,6 +382,8 @@ def create_palette_node_from_params(params): if key == "category": category = value + elif key == "tag": + tag = value elif key == "text": text = value elif key == "description": @@ -468,9 +467,10 @@ def create_palette_node_from_params(params): add_required_fields_for_category(fields, category) # create and return the node + # TODO: we can remove a bunch of these attributes (isData etc) return { "category": category, - "categoryType": "Application", + "tag": tag, "isData": False, "isGroup": False, "canHaveInputs": True, @@ -634,7 +634,7 @@ def process_compounddef(compounddef): if __name__ == "__main__": logging.basicConfig(format="%(asctime)s - %(message)s", datefmt="%d-%b-%y %H:%M:%S") - (inputdir, outputfile) = get_filenames_from_command_line(sys.argv[1:]) + (inputdir, tag, outputfile) = get_options_from_command_line(sys.argv[1:]) # create a temp directory for the output of doxygen output_directory = tempfile.TemporaryDirectory() @@ -706,7 +706,10 @@ def process_compounddef(compounddef): # create a node n = create_palette_node_from_params(params) - nodes.append(n) + + # if the node tag matches the command line tag, or no tag was specified on the command line, add the node to the list to output + if n["tag"] == tag or tag == "": + nodes.append(n) # check if gitrepo and version params were found and cache the values for param in params: