From 4e022888343c88f3b1a7498f56364b637c2e8783 Mon Sep 17 00:00:00 2001 From: james-strauss-uwa Date: Tue, 8 Mar 2022 16:02:39 +0800 Subject: [PATCH 1/7] Removed second copy of Plasma and PlasmaFlight doxygen, added missing tags and updated param format. Modified xml2palette to not add missing params, too opaque. --- daliuge-engine/dlg/drop.py | 57 +++-------- tools/xml2palette/xml2palette.py | 167 +++---------------------------- 2 files changed, 28 insertions(+), 196 deletions(-) diff --git a/daliuge-engine/dlg/drop.py b/daliuge-engine/dlg/drop.py index fc520bd21..c4081beb3 100644 --- a/daliuge-engine/dlg/drop.py +++ b/daliuge-engine/dlg/drop.py @@ -2119,15 +2119,16 @@ def exists(self): # @details An object in a Apache Arrow Plasma in-memory object store # @par EAGLE_START # @par category Plasma -# @param[in] param/data_volume Data volume/5/Float/readwrite/ +# @param tag template +# @param[in] param/data_volume Data volume/5/Float/readwrite/False//False/ # \~English Estimated size of the data contained in this node -# @param[in] param/group_end Group end/False/Boolean/readwrite/ +# @param[in] param/group_end Group end/False/Boolean/readwrite/False//False/ # \~English Is this node the end of a group? -# @param[in] param/plasma_path Plasma Path//String/readwrite/ +# @param[in] param/plasma_path Plasma Path//String/readwrite/False//False/ # \~English Path to the local plasma store -# @param[in] param/object_id Object Id//String/readwrite/ +# @param[in] param/object_id Object Id//String/readwrite/False//False/ # \~English PlasmaId of the object for all compute nodes -# @param[in] param/use_staging Use Staging/False/Boolean/readwrite/ +# @param[in] param/use_staging Use Staging/False/Boolean/readwrite/False//False/ # \~English Enables writing to a dynamically resizeable staging buffer # @par EAGLE_END class PlasmaDROP(DataDROP): @@ -2163,15 +2164,16 @@ def dataURL(self): # to a Plasma in-memory object store # @par EAGLE_START # @par category Plasma -# @param[in] param/data_volume Data volume/5/Float/readwrite/ +# @param tag template +# @param[in] param/data_volume Data volume/5/Float/readwrite/False//False/ # \~English Estimated size of the data contained in this node -# @param[in] param/group_end Group end/False/Boolean/readwrite/ +# @param[in] param/group_end Group end/False/Boolean/readwrite/False//False/ # \~English Is this node the end of a group? -# @param[in] param/plasma_path Plasma Path//String/readwrite/ +# @param[in] param/plasma_path Plasma Path//String/readwrite/False//False/ # \~English Path to the local plasma store -# @param[in] param/object_id Object Id//String/readwrite/ +# @param[in] param/object_id Object Id//String/readwrite/False//False/ # \~English PlasmaId of the object for all compute nodes -# @param[in] param/flight_path Flight Path//String/readwrite/ +# @param[in] param/flight_path Flight Path//String/readwrite/False//False/ # \~English IP and flight port of the drop owner # @par EAGLE_END class PlasmaFlightDROP(DataDROP): @@ -2647,23 +2649,6 @@ def execute(self, _send_notifications=True): self._notifyAppIsFinished() -## -# @brief Plasma -# @details An object in a Apache Arrow Plasma in-memory object store -# @par EAGLE_START -# @param category Plasma -# @param tag template -# @param[in] cparam/data_volume Data volume/5/Float/readwrite/False//False/ -# \~English Estimated size of the data contained in this node -# @param[in] cparam/group_end Group end/False/Boolean/readwrite/False//False/ -# \~English Is this node the end of a group? -# @param[in] cparam/plasma_path Plasma Path//String/readwrite/False//False/ -# \~English Path to the local plasma store -# @param[in] cparam/object_id Object Id//String/readwrite/False//False/ -# \~English PlasmaId of the object for all compute nodes -# @param[in] cparam/use_staging Use Staging/False/Boolean/readwrite/False//False/ -# \~English Enables writing to a dynamically resizeable staging buffer -# @par EAGLE_END class PlasmaDROP(AbstractDROP): """ A DROP that points to data stored in a Plasma Store @@ -2691,24 +2676,6 @@ def dataURL(self): return "plasma://%s" % (binascii.hexlify(self.object_id).decode("ascii")) -## -# @brief PlasmaFlight -# @details An Apache Arrow Flight server providing distributed access -# to a Plasma in-memory object store -# @par EAGLE_START -# @param category PlasmaFlight -# @param tag template -# @param[in] cparam/data_volume Data volume/5/Float/readwrite/False//False/ -# \~English Estimated size of the data contained in this node -# @param[in] cparam/group_end Group end/False/Boolean/readwrite/False//False/ -# \~English Is this node the end of a group? -# @param[in] cparam/plasma_path Plasma Path//String/readwrite/False//False/ -# \~English Path to the local plasma store -# @param[in] cparam/object_id Object Id//String/readwrite/False//False/ -# \~English PlasmaId of the object for all compute nodes -# @param[in] cparam/flight_path Flight Path//String/readwrite/False//False/ -# \~English IP and flight port of the drop owner -# @par EAGLE_END class PlasmaFlightDROP(AbstractDROP): """ A DROP that points to data stored in a Plasma Store diff --git a/tools/xml2palette/xml2palette.py b/tools/xml2palette/xml2palette.py index b71e1c6d4..5f87d574e 100644 --- a/tools/xml2palette/xml2palette.py +++ b/tools/xml2palette/xml2palette.py @@ -150,164 +150,32 @@ def find_field_by_name(fields, name): return None -def add_required_fields_for_category(text, fields, category): +def check_required_fields_for_category(text, fields, category): if category in ["DynlibApp", "PythonApp", "Branch", "BashShellApp", "Mpi", "Docker"]: - add_field_if_missing( - text, - fields, - "execution_time", - "Execution time", - 5, - "Estimated execution time", - "readwrite", - "Float", - False, - [], - False, - ) - add_field_if_missing( - text, - fields, - "num_cpus", - "Num CPUs", - 1, - "Number of cores used", - "readwrite", - "Integer", - False, - [], - False, - ) + alert_if_missing(text, "execution_time") + alert_if_missing(text, "num_cpus") if category in ["DynlibApp", "PythonApp", "Branch", "BashShellApp", "Docker"]: - add_field_if_missing( - text, - fields, - "group_start", - "Group start", - "false", - "Component is start of a group", - "readwrite", - "Boolean", - False, - [], - False, - ) + alert_if_missing(text, "group_start") if category == "DynlibApp": - add_field_if_missing(text, fields, "libpath", "Library path", "", "", "readwrite", "String", False, [], False) + alert_if_missing(text, "libpath") if category in ["PythonApp", "Branch"]: - add_field_if_missing( - text, - fields, - "appclass", - "Appclass", - "dlg.apps.simple.SleepApp", - "Application class", - "readwrite", - "String", - False, - [], - False, - ) + alert_if_missing(text, "appclass") if category in ["File", "Memory", "NGAS", "ParameterSet", "Plasma", "PlasmaFlight", "S3"]: - add_field_if_missing( - text, - fields, - "data_volume", - "Data volume", - 5, - "Estimated size of the data contained in this node", - "readwrite", - "Integer", - False, - [], - False, - ) + alert_if_missing(text, "data_volume") if category in ["File", "Memory", "NGAS", "ParameterSet", "Plasma", "PlasmaFlight", "S3", "Mpi"]: - add_field_if_missing( - text, - fields, - "group_end", - "Group end", - "false", - "Component is end of a group", - "readwrite", - "Boolean", - False, - [], - False, - ) + alert_if_missing(text, "group_end") if category in ["BashShellApp", "Mpi", "Docker", "Singularity"]: - add_field_if_missing( - text, - fields, - "input_redirection", - "Input redirection", - "", - "The command line argument that specifies the input into this application", - "readwrite", - "String", - False, - [], - False, - ) - add_field_if_missing( - text, - fields, - "output_redirection", - "Output redirection", - "", - "The command line argument that specifies the output from this application", - "readwrite", - "String", - False, - [], - False, - ) - add_field_if_missing( - text, - fields, - "command_line_arguments", - "Command line arguments", - "", - "Additional command line arguments to be added to the command line to be executed", - "readwrite", - "String", - False, - [], - False, - ) - add_field_if_missing( - text, - fields, - "paramValueSeparator", - "Param Value Separator", - " ", - "Separator character(s) between parameters on the command line", - "readwrite", - "String", - False, - [], - False, - ) - add_field_if_missing( - text, - fields, - "argumentPrefix", - "Argument prefix", - "--", - "Prefix to each keyed argument on the command line", - "readwrite", - "String", - False, - [], - False, - ) + alert_if_missing(text, "input_redirection") + alert_if_missing(text, "output_redirection") + alert_if_missing(text, "command_line_arguments") + alert_if_missing(text, "paramValueSeparator") + alert_if_missing(text, "argumentPrefix") def create_field(internal_name, name, value, description, access, type, precious, options, positional): @@ -325,14 +193,11 @@ def create_field(internal_name, name, value, description, access, type, precious } -def add_field_if_missing(text, fields, internal_name, name, value, description, access, type, precious, options, positional): +def alert_if_missing(text, internal_name): if find_field_by_name(fields, internal_name) is None: logging.warning( text + " component added missing " + internal_name + " cparam" ) - fields.append( - create_field(internal_name, name, value, description, access, type, precious, options, positional) - ) def parse_key(key): @@ -662,8 +527,8 @@ def create_palette_node_from_params(params): else: logging.warning("Unknown port direction: " + direction) - # add extra fields that must be included for the category - add_required_fields_for_category(text, fields, category) + # check for presence of extra fields that must be included for each category + check_required_fields_for_category(text, fields, category) # create and return the node return ({ From 3cd8bd7ba4b4f104337257e3c8f5f3329df7bdfc Mon Sep 17 00:00:00 2001 From: james-strauss-uwa Date: Tue, 8 Mar 2022 16:10:59 +0800 Subject: [PATCH 2/7] Fixed bug in xml2palette --- tools/xml2palette/xml2palette.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/tools/xml2palette/xml2palette.py b/tools/xml2palette/xml2palette.py index 5f87d574e..426cb837a 100644 --- a/tools/xml2palette/xml2palette.py +++ b/tools/xml2palette/xml2palette.py @@ -152,30 +152,30 @@ def find_field_by_name(fields, name): def check_required_fields_for_category(text, fields, category): if category in ["DynlibApp", "PythonApp", "Branch", "BashShellApp", "Mpi", "Docker"]: - alert_if_missing(text, "execution_time") - alert_if_missing(text, "num_cpus") + alert_if_missing(text, fields, "execution_time") + alert_if_missing(text, fields, "num_cpus") if category in ["DynlibApp", "PythonApp", "Branch", "BashShellApp", "Docker"]: - alert_if_missing(text, "group_start") + alert_if_missing(text, fields, "group_start") if category == "DynlibApp": - alert_if_missing(text, "libpath") + alert_if_missing(text, fields, "libpath") if category in ["PythonApp", "Branch"]: - alert_if_missing(text, "appclass") + alert_if_missing(text, fields, "appclass") if category in ["File", "Memory", "NGAS", "ParameterSet", "Plasma", "PlasmaFlight", "S3"]: - alert_if_missing(text, "data_volume") + alert_if_missing(text, fields, "data_volume") if category in ["File", "Memory", "NGAS", "ParameterSet", "Plasma", "PlasmaFlight", "S3", "Mpi"]: - alert_if_missing(text, "group_end") + alert_if_missing(text, fields, "group_end") if category in ["BashShellApp", "Mpi", "Docker", "Singularity"]: - alert_if_missing(text, "input_redirection") - alert_if_missing(text, "output_redirection") - alert_if_missing(text, "command_line_arguments") - alert_if_missing(text, "paramValueSeparator") - alert_if_missing(text, "argumentPrefix") + alert_if_missing(text, fields, "input_redirection") + alert_if_missing(text, fields, "output_redirection") + alert_if_missing(text, fields, "command_line_arguments") + alert_if_missing(text, fields, "paramValueSeparator") + alert_if_missing(text, fields, "argumentPrefix") def create_field(internal_name, name, value, description, access, type, precious, options, positional): @@ -193,11 +193,9 @@ def create_field(internal_name, name, value, description, access, type, precious } -def alert_if_missing(text, internal_name): +def alert_if_missing(text, fields, internal_name): if find_field_by_name(fields, internal_name) is None: - logging.warning( - text + " component added missing " + internal_name + " cparam" - ) + logging.warning(text + " component missing " + internal_name + " cparam") def parse_key(key): From 202f83a70bb88c149dc7b1649841871a8d64018f Mon Sep 17 00:00:00 2001 From: james-strauss-uwa Date: Tue, 8 Mar 2022 16:28:00 +0800 Subject: [PATCH 3/7] Check that data nodes are not assigned aparams, those are unsuitable. Only add construct nodes if the tag matches. --- daliuge-engine/dlg/drop.py | 20 ++++++++++---------- tools/xml2palette/xml2palette.py | 18 ++++++++++++------ 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/daliuge-engine/dlg/drop.py b/daliuge-engine/dlg/drop.py index c4081beb3..14eaf5d62 100644 --- a/daliuge-engine/dlg/drop.py +++ b/daliuge-engine/dlg/drop.py @@ -2120,15 +2120,15 @@ def exists(self): # @par EAGLE_START # @par category Plasma # @param tag template -# @param[in] param/data_volume Data volume/5/Float/readwrite/False//False/ +# @param[in] cparam/data_volume Data volume/5/Float/readwrite/False//False/ # \~English Estimated size of the data contained in this node -# @param[in] param/group_end Group end/False/Boolean/readwrite/False//False/ +# @param[in] cparam/group_end Group end/False/Boolean/readwrite/False//False/ # \~English Is this node the end of a group? -# @param[in] param/plasma_path Plasma Path//String/readwrite/False//False/ +# @param[in] cparam/plasma_path Plasma Path//String/readwrite/False//False/ # \~English Path to the local plasma store -# @param[in] param/object_id Object Id//String/readwrite/False//False/ +# @param[in] cparam/object_id Object Id//String/readwrite/False//False/ # \~English PlasmaId of the object for all compute nodes -# @param[in] param/use_staging Use Staging/False/Boolean/readwrite/False//False/ +# @param[in] cparam/use_staging Use Staging/False/Boolean/readwrite/False//False/ # \~English Enables writing to a dynamically resizeable staging buffer # @par EAGLE_END class PlasmaDROP(DataDROP): @@ -2165,15 +2165,15 @@ def dataURL(self): # @par EAGLE_START # @par category Plasma # @param tag template -# @param[in] param/data_volume Data volume/5/Float/readwrite/False//False/ +# @param[in] cparam/data_volume Data volume/5/Float/readwrite/False//False/ # \~English Estimated size of the data contained in this node -# @param[in] param/group_end Group end/False/Boolean/readwrite/False//False/ +# @param[in] cparam/group_end Group end/False/Boolean/readwrite/False//False/ # \~English Is this node the end of a group? -# @param[in] param/plasma_path Plasma Path//String/readwrite/False//False/ +# @param[in] cparam/plasma_path Plasma Path//String/readwrite/False//False/ # \~English Path to the local plasma store -# @param[in] param/object_id Object Id//String/readwrite/False//False/ +# @param[in] cparam/object_id Object Id//String/readwrite/False//False/ # \~English PlasmaId of the object for all compute nodes -# @param[in] param/flight_path Flight Path//String/readwrite/False//False/ +# @param[in] cparam/flight_path Flight Path//String/readwrite/False//False/ # \~English IP and flight port of the drop owner # @par EAGLE_END class PlasmaFlightDROP(DataDROP): diff --git a/tools/xml2palette/xml2palette.py b/tools/xml2palette/xml2palette.py index 426cb837a..f649e502f 100644 --- a/tools/xml2palette/xml2palette.py +++ b/tools/xml2palette/xml2palette.py @@ -33,6 +33,7 @@ KNOWN_PARAM_DATA_TYPES = ["String", "Integer", "Float", "Complex", "Boolean", "Select", "Password", "Json"] KNOWN_CONSTRUCT_TYPES = ["Scatter", "Gather"] +KNOWN_DATA_CATEGORIES = ["File", "Memory", "SharedMemory", "NGAS", "ParameterSet", "S3", "Plasma", "PlasmaFlight"] def get_options_from_command_line(argv): @@ -345,7 +346,6 @@ def create_palette_node_from_params(params): category = "" tag = "" construct = "" - categoryType = "" inputPorts = [] outputPorts = [] inputLocalPorts = [] @@ -441,6 +441,12 @@ def create_palette_node_from_params(params): text + " aparam '" + name + "' has unknown type: " + type ) + # check that category if suitable for aparams + if category in KNOWN_DATA_CATEGORIES: + logging.warning( + text + " has aparam, which is not suitable for a " + category + " node" + ) + # check that a param of type "Select" has some options specified, # and check that every param with some options specified is of type "Select" if type == "Select" and len(options) == 0: @@ -815,11 +821,11 @@ def create_construct_node(type, node): logging.info("Adding component: " + node["text"]) nodes.append(node) - # if a construct is found, add to nodes - if data["construct"] != "": - logging.info("Adding component: " + data["construct"] + "/" + node["text"]) - construct_node = create_construct_node(data["construct"], node) - nodes.append(construct_node) + # if a construct is found, add to nodes + if data["construct"] != "": + logging.info("Adding component: " + data["construct"] + "/" + node["text"]) + construct_node = create_construct_node(data["construct"], node) + nodes.append(construct_node) # check if gitrepo and version params were found and cache the values for param in params: From 2592dbb5c58d30dc41a442bede794d373403e30e Mon Sep 17 00:00:00 2001 From: james-strauss-uwa Date: Tue, 8 Mar 2022 16:45:50 +0800 Subject: [PATCH 4/7] Use param instead or par for Plasma categories --- daliuge-engine/dlg/drop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/daliuge-engine/dlg/drop.py b/daliuge-engine/dlg/drop.py index 14eaf5d62..049b5717a 100644 --- a/daliuge-engine/dlg/drop.py +++ b/daliuge-engine/dlg/drop.py @@ -2118,7 +2118,7 @@ def exists(self): # @brief Plasma # @details An object in a Apache Arrow Plasma in-memory object store # @par EAGLE_START -# @par category Plasma +# @param category Plasma # @param tag template # @param[in] cparam/data_volume Data volume/5/Float/readwrite/False//False/ # \~English Estimated size of the data contained in this node @@ -2163,7 +2163,7 @@ def dataURL(self): # @details An Apache Arrow Flight server providing distributed access # to a Plasma in-memory object store # @par EAGLE_START -# @par category Plasma +# @param category Plasma # @param tag template # @param[in] cparam/data_volume Data volume/5/Float/readwrite/False//False/ # \~English Estimated size of the data contained in this node From 3ec7cc19b97d7353ebfaf72d6cb49e66ee62956e Mon Sep 17 00:00:00 2001 From: james-strauss-uwa Date: Tue, 8 Mar 2022 16:52:27 +0800 Subject: [PATCH 5/7] Switched two ParameterSet aparams to cparams --- daliuge-engine/dlg/parset_drop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/daliuge-engine/dlg/parset_drop.py b/daliuge-engine/dlg/parset_drop.py index 3280bdb8e..8b382d6c0 100644 --- a/daliuge-engine/dlg/parset_drop.py +++ b/daliuge-engine/dlg/parset_drop.py @@ -36,8 +36,8 @@ # @param tag template # @param[in] cparam/data_volume Data volume/5/Float/readwrite/False//False/Estimated size of the data contained in this node # @param[in] cparam/group_end Group end/False/Boolean/readwrite/False//False/Is this node the end of a group? -# @param[in] aparam/mode Parset mode/"YANDA"/String/readonly/False//False/To what standard DALiuGE should filter and serialize the parameters. -# @param[in] aparam/config_data ConfigData/""/String/readwrite/False//False/Additional configuration information to be mixed in with the initial data +# @param[in] cparam/mode Parset mode/"YANDA"/String/readonly/False//False/To what standard DALiuGE should filter and serialize the parameters. +# @param[in] cparam/config_data ConfigData/""/String/readwrite/False//False/Additional configuration information to be mixed in with the initial data # @param[out] port/Config ConfigFile/File/The output configuration file # @par EAGLE_END class ParameterSetDROP(AbstractDROP): From 0899eb0bdf730f03f8ec060aae74b9aad76f4ff1 Mon Sep 17 00:00:00 2001 From: james-strauss-uwa Date: Tue, 8 Mar 2022 16:59:38 +0800 Subject: [PATCH 6/7] Added doxygen for SharedMemory --- daliuge-engine/dlg/drop.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/daliuge-engine/dlg/drop.py b/daliuge-engine/dlg/drop.py index 049b5717a..e2543f02d 100644 --- a/daliuge-engine/dlg/drop.py +++ b/daliuge-engine/dlg/drop.py @@ -1855,6 +1855,17 @@ def dataURL(self): return "mem://%s/%d/%d" % (hostname, os.getpid(), id(self._buf)) +## +# @brief SharedMemory +# @details Data stored in shared memory +# @par EAGLE_START +# @param category SharedMemory +# @param tag template +# @param[in] cparam/data_volume Data volume/5/Float/readwrite/False//False/ +# \~English Estimated size of the data contained in this node +# @param[in] cparam/group_end Group end/False/Boolean/readwrite/False//False/ +# \~English Is this node the end of a group? +# @par EAGLE_END class SharedMemoryDROP(DataDROP): """ A DROP that points to data stored in shared memory. From f0ecde20989bf69f95b263c9d26a9f2b24fc7948 Mon Sep 17 00:00:00 2001 From: james-strauss-uwa Date: Tue, 8 Mar 2022 17:08:07 +0800 Subject: [PATCH 7/7] Put PlasmaFlight component in the PlasmaFlight category --- daliuge-engine/dlg/drop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daliuge-engine/dlg/drop.py b/daliuge-engine/dlg/drop.py index e2543f02d..8c2ef9e1d 100644 --- a/daliuge-engine/dlg/drop.py +++ b/daliuge-engine/dlg/drop.py @@ -2174,7 +2174,7 @@ def dataURL(self): # @details An Apache Arrow Flight server providing distributed access # to a Plasma in-memory object store # @par EAGLE_START -# @param category Plasma +# @param category PlasmaFlight # @param tag template # @param[in] cparam/data_volume Data volume/5/Float/readwrite/False//False/ # \~English Estimated size of the data contained in this node