ICRAR · awicenec · Apr 17, 2023 · Apr 17, 2023 · Apr 17, 2023 · Apr 17, 2023
diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml
@@ -3,24 +3,23 @@ name: Run unit tests
 on: [push, pull_request]
 
 jobs:
-
   run_tests:
     name: Run unit tests with python ${{matrix.python-version}} - ${{ matrix.desc }}
     runs-on: ubuntu-20.04
     strategy:
       matrix:
         include:
-          - python-version: '3.8'
+          - python-version: "3.8.10"
             test_number: 0
             engine: no
             translator: yes
             desc: "no engine"
-          - python-version: '3.8'
+          - python-version: "3.8.10"
             test_number: 1
             desc: "no translator"
             engine: yes
             translator: no
-          - python-version: '3.9'
+          - python-version: "3.9"
             test_number: 2
             desc: "full package"
             engine: yes
@@ -81,8 +80,8 @@ jobs:
     needs: run_tests
     runs-on: ubuntu-20.04
     steps:
-    - name: Coveralls Finished
-      uses: coverallsapp/github-action@master
-      with:
-        github-token: ${{ secrets.GITHUB_TOKEN }}
-        parallel-finished: true
+      - name: Coveralls Finished
+        uses: coverallsapp/github-action@master
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          parallel-finished: true
diff --git a/daliuge-common/dlg/common/__init__.py b/daliuge-common/dlg/common/__init__.py
@@ -40,6 +40,7 @@ class DropType:
 class CategoryType:
     DATA = "Data"
     APPLICATION = "Application"
+    CONSTRUCT = "Construct"
     GROUP = "Group"
     UNKNOWN = "Unknown"
     SERVICE = "Service"
@@ -81,7 +82,12 @@ def _addSomething(self, other, key, IdText=None):
         if key not in self:
             self[key] = []
         if other["oid"] not in self[key]:
+            # TODO: Returning just the other drop OID instead of the named
+            #       port list is not a good solution. Required for the dask
+            #       tests.
             append = {other["oid"]: IdText} if IdText else other["oid"]
+            # if IdText is None:
+            # raise ValueError
             self[key].append(append)
 
     def addConsumer(self, other, IdText=None):

diff --git a/daliuge-engine/dlg/apps/app_base.py b/daliuge-engine/dlg/apps/app_base.py
@@ -452,7 +452,7 @@ def execute(self, _send_notifications=True):
                     return
                 tries += 1
                 logger.exception(
-                    "Error while executing %r (try %d/%d)",
+                    "Error while executing %r (try %s/%s)",
                     self,
                     tries,
                     self.n_tries,

diff --git a/daliuge-engine/dlg/apps/pyfunc.py b/daliuge-engine/dlg/apps/pyfunc.py
@@ -501,7 +501,7 @@ def optionalEval(x):
 
         # if we have named ports use the inputs with
         # the correct UIDs
-        logger.debug(f"Parameters found: {self.parameters}")
+        # logger.debug(f"Parameters found: {self.parameters}")
         posargs = self.arguments.args[: self.fn_npos]
         keyargs = self.arguments.args[self.fn_npos :]
         kwargs = {}
@@ -702,8 +702,15 @@ def optionalEval(x):
         logger.debug(f"updating funcargs with {kwargs}")
         funcargs.update(kwargs)
         self._recompute_data["args"] = funcargs.copy()
-        logger.debug(f"Running {self.func_name} with *{pargs} **{funcargs}")
 
+        if (
+            self.func_name is not None
+            and self.func_name.split(".")[-1] in ["__init__", "__class__"]
+            and "self" in funcargs
+        ):
+            # remove self if this is the initializer.
+            funcargs.pop("self")
+        logger.debug(f"Running {self.func_name} with *{pargs} **{funcargs}")
         # we capture and log whatever is produced on STDOUT
         capture = StringIO()
         with redirect_stdout(capture):

diff --git a/daliuge-engine/dlg/apps/simple.py b/daliuge-engine/dlg/apps/simple.py
diff --git a/daliuge-engine/dlg/dask_emulation.py b/daliuge-engine/dlg/dask_emulation.py
@@ -111,11 +111,11 @@ def compute(value, **kwargs):
         {
             "categoryType": "Application",
             #            "categoryType": CategoryType.APPLICATION,
-            "Application": "dlg.dask_emulation.ResultTransmitter",
+            # "Application": "dlg.dask_emulation.ResultTransmitter",
             "appclass": "dlg.dask_emulation.ResultTransmitter",
             "oid": transmitter_oid,
+            "uid": transmitter_oid,
             "port": port,
-            "nm": "result transmitter",
             "name": "result transmitter",
         }
     )
@@ -286,7 +286,6 @@ def make_dropdict(self):
             {
                 "categoryType": "Application",
                 "appclass": "dlg.dask_emulation._Listifier",
-                "nm": "listifier",
                 "name": "listifier",
             }
         )
@@ -322,7 +321,6 @@ def make_dropdict(self):
         if self.fname is not None:
             simple_fname = self.fname.split(".")[-1]
             my_dropdict["func_name"] = self.fname
-            my_dropdict["nm"] = simple_fname
             my_dropdict["name"] = simple_fname
         if self.fcode is not None:
             my_dropdict["func_code"] = utils.b2s(base64.b64encode(self.fcode))

diff --git a/daliuge-engine/dlg/data/drops/data_base.py b/daliuge-engine/dlg/data/drops/data_base.py
@@ -26,6 +26,7 @@
 from typing import Union
 
 from dlg.ddap_protocol import DROPStates
+
 from dlg.drop import AbstractDROP, track_current_drop
 from dlg.data.io import (
     DataIO,

diff --git a/daliuge-engine/dlg/data/drops/parset_drop.py b/daliuge-engine/dlg/data/drops/parset_drop.py
@@ -27,6 +27,10 @@
 from dlg.data.io import MemoryIO
 from dlg.meta import dlg_string_param
 
+from logging import Logger
+
+logger = Logger("__main__")
+
 
 ##
 # @brief ParameterSet
@@ -40,6 +44,7 @@
 # @param config_data ConfigData/""/String/ComponentParameter/readwrite//False/False/Additional configuration information to be mixed in with the initial data
 # @param streaming Streaming/False/Boolean/ComponentParameter/readwrite//False/False/Specifies whether this data component streams input and output data
 # @param persist Persist/False/Boolean/ComponentParameter/readwrite//False/False/Specifies whether this data component contains data that should not be deleted after execution
+# @param dataclass dataclass//dlg.data.drops.parset_drop.ParameterSetDROP//readonly//False/False/default class for this DROP
 # @param Config ConfigFile//Object.File/OutputPort/readwrite//False/False/The output configuration file
 # @par EAGLE_END
 class ParameterSetDROP(DataDROP):
@@ -85,6 +90,7 @@ def initialize(self, **kwargs):
         self.config_data = self.serialize_parameters(
             self.filter_parameters(self.parameters, self.mode), self.mode
         ).encode("utf-8")
+        logger.debug(">>>> config_data: %s", self.config_data)
 
     def getIO(self):
         return MemoryIO(io.BytesIO(self.config_data))

diff --git a/daliuge-engine/dlg/data/drops/s3_drop.py b/daliuge-engine/dlg/data/drops/s3_drop.py
@@ -146,7 +146,6 @@ def __init__(
         expectedSize=-1,
         **kwargs,
     ):
-
         super().__init__(**kwargs)
 
         logger.debug(

diff --git a/daliuge-engine/dlg/deploy/create_dlg_job.py b/daliuge-engine/dlg/deploy/create_dlg_job.py
@@ -35,8 +35,13 @@
 import time
 import os
 
-from dlg.deploy.configs import ConfigFactory  # get all available configurations
-from dlg.deploy.deployment_constants import DEFAULT_AWS_MON_PORT, DEFAULT_AWS_MON_HOST
+from dlg.deploy.configs import (
+    ConfigFactory,
+)  # get all available configurations
+from dlg.deploy.deployment_constants import (
+    DEFAULT_AWS_MON_PORT,
+    DEFAULT_AWS_MON_HOST,
+)
 from dlg.deploy.slurm_client import SlurmClient
 
 FACILITIES = ConfigFactory.available()
@@ -50,7 +55,10 @@ def get_timestamp(line):
     date_time = "{0}T{1}".format(split[0], split[1])
     pattern = "%Y-%m-%dT%H:%M:%S,%f"
     epoch = time.mktime(time.strptime(date_time, pattern))
-    return datetime.datetime.strptime(date_time, pattern).microsecond / 1e6 + epoch
+    return (
+        datetime.datetime.strptime(date_time, pattern).microsecond / 1e6
+        + epoch
+    )
 
 
 class LogEntryPair:
@@ -137,7 +145,8 @@ def build_nm_log_entry_pairs():
 def construct_catchall_pattern(node_type):
     pattern_strs = LogParser.kwords.get(node_type)
     patterns = [
-        x.format(".*").replace("(", r"\(").replace(")", r"\)") for x in pattern_strs
+        x.format(".*").replace("(", r"\(").replace(")", r"\)")
+        for x in pattern_strs
     ]
     catchall = "|".join(["(%s)" % (s,) for s in patterns])
     catchall = ".*(%s).*" % (catchall,)
@@ -205,15 +214,20 @@ class LogParser:
 
     kwords = dict()
     kwords["dim"] = dim_kl
-    kwords["nm"] = nm_kl
+    kwords["name"] = nm_kl
 
     def __init__(self, log_dir):
         self._dim_log_f = None
         if not self.check_log_dir(log_dir):
             raise Exception("No DIM log found at: {0}".format(log_dir))
         self._log_dir = log_dir
-        self._dim_catchall_pattern = construct_catchall_pattern(node_type="dim")
-        self._nm_catchall_pattern = construct_catchall_pattern(node_type="nm")
+        self._dim_catchall_pattern = construct_catchall_pattern(
+            node_type="dim"
+        )
+        # self._nm_catchall_pattern = construct_catchall_pattern(node_type="nm")
+        self._nm_catchall_pattern = construct_catchall_pattern(
+            node_type="name"
+        )
 
     def parse(self, out_csv=None):
         """
@@ -271,11 +285,14 @@ def parse(self, out_csv=None):
 
         num_dims = 0
         for log_directory_file_name in os.listdir(self._log_dir):
-
             # Check this is a dir and contains the NM log
-            if not os.path.isdir(os.path.join(self._log_dir, log_directory_file_name)):
+            if not os.path.isdir(
+                os.path.join(self._log_dir, log_directory_file_name)
+            ):
                 continue
-            nm_logf = os.path.join(self._log_dir, log_directory_file_name, "dlgNM.log")
+            nm_logf = os.path.join(
+                self._log_dir, log_directory_file_name, "dlgNM.log"
+            )
             nm_dim_logf = os.path.join(
                 self._log_dir, log_directory_file_name, "dlgDIM.log"
             )
@@ -303,7 +320,6 @@ def parse(self, out_csv=None):
 
             # Looking for the deployment times and counting for finished sessions
             for lep in nm_log_pairs:
-
                 # Consider only valid durations
                 dur = lep.get_duration()
                 if dur is None:
@@ -339,7 +355,6 @@ def parse(self, out_csv=None):
         # effect
         max_exec_time = 0
         for log_entry_pairs in nm_logs:
-
             indexed_leps = {lep.name: lep for lep in log_entry_pairs}
             deploy_time = indexed_leps["node_deploy_time"].get_duration()
             if deploy_time is None:  # since some node managers failed to start
@@ -366,7 +381,9 @@ def parse(self, out_csv=None):
             git_commit,
         ]
         ret = [str(x) for x in ret]
-        num_dims = num_dims if num_dims == 1 else num_dims - 1  # exclude master manager
+        num_dims = (
+            num_dims if num_dims == 1 else num_dims - 1
+        )  # exclude master manager
         add_line = ",".join(ret + temp_dim + temp_nm + [str(int(num_dims))])
         if out_csv is not None:
             with open(out_csv, "a") as out_file:
@@ -384,7 +401,9 @@ def check_log_dir(self, log_dir):
             if os.path.exists(dim_log_f):
                 self._dim_log_f = [dim_log_f]
                 if dim_log_f == possible_logs[0]:
-                    cluster_log = os.path.join(log_dir, "0", "start_dlg_cluster.log")
+                    cluster_log = os.path.join(
+                        log_dir, "0", "start_dlg_cluster.log"
+                    )
                     if os.path.exists(cluster_log):
                         self._dim_log_f.append(cluster_log)
                 return True
@@ -585,7 +604,9 @@ def main():
     if not (opts.action and opts.facility) and not opts.configs:
         parser.error("Missing required parameters!")
     if opts.facility not in FACILITIES:
-        parser.error(f"Unknown facility provided. Please choose from {FACILITIES}")
+        parser.error(
+            f"Unknown facility provided. Please choose from {FACILITIES}"
+        )
 
     if opts.action == 2:
         if opts.log_dir is None:
@@ -609,20 +630,23 @@ def main():
                             log_parser = LogParser(log_dir)
                             log_parser.parse(out_csv=opts.csv_output)
                         except Exception as exp:
-                            print("Fail to parse {0}: {1}".format(log_dir, exp))
+                            print(
+                                "Fail to parse {0}: {1}".format(log_dir, exp)
+                            )
         else:
             log_parser = LogParser(opts.log_dir)
             log_parser.parse(out_csv=opts.csv_output)
     elif opts.action == 1:
-
         if opts.logical_graph and opts.physical_graph:
             parser.error(
                 "Either a logical graph or physical graph filename must be specified"
             )
         for path_to_graph_file in (opts.logical_graph, opts.physical_graph):
             if path_to_graph_file and not os.path.exists(path_to_graph_file):
                 parser.error(
-                    "Cannot locate graph file at '{0}'".format(path_to_graph_file)
+                    "Cannot locate graph file at '{0}'".format(
+                        path_to_graph_file
+                    )
                 )
 
         client = SlurmClient(