From caf3c05ecc6e86bebb5d8e259e86c818e9903e3f Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Fri, 10 Jun 2022 14:50:26 +0800 Subject: [PATCH 1/4] Cleanup imports On the one hand the isin function from numpy has no business here; on the other the Categories name was being imported twice. Signed-off-by: Rodrigo Tobar --- daliuge-engine/dlg/graph_loader.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/daliuge-engine/dlg/graph_loader.py b/daliuge-engine/dlg/graph_loader.py index ffd231e23..f6a4145c8 100644 --- a/daliuge-engine/dlg/graph_loader.py +++ b/daliuge-engine/dlg/graph_loader.py @@ -30,8 +30,6 @@ from dlg.common.reproducibility.constants import ReproducibilityFlags -from numpy import isin - from . import droputils from .apps.socket_listener import SocketListenerApp from .common import Categories @@ -53,7 +51,7 @@ from dlg.parset_drop import ParameterSetDROP from .exceptions import InvalidGraphException from .json_drop import JsonDROP -from .common import Categories, DropType +from .common import DropType STORAGE_TYPES = { From 54df5952936eb0044c66b73b6a11e4fc1bd528b3 Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Fri, 10 Jun 2022 14:28:29 +0800 Subject: [PATCH 2/4] Bring back actual relationship deletion This code was blindly changed along with the rest of the function body, but the change here doesn't make sense. Calling "del dict[key]" effectively removes that key from the dictionary, while obtaining the value for that key into a new variable and then deleting the variable (i.e., "x = dict[key]; del x") has no effect on the dictionary and simply wastes CPU cycles. Signed-off-by: Rodrigo Tobar --- daliuge-engine/dlg/graph_loader.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/daliuge-engine/dlg/graph_loader.py b/daliuge-engine/dlg/graph_loader.py index f6a4145c8..6d9bdc6f0 100644 --- a/daliuge-engine/dlg/graph_loader.py +++ b/daliuge-engine/dlg/graph_loader.py @@ -188,9 +188,7 @@ def removeUnmetRelationships(dropSpecList): to_delete.append(rel) for rel in to_delete: - ds = dropSpec[rel] - ds = list(ds.keys())[0] if isinstance(ds, dict) else ds - del ds + del dropSpec[rel] return unmetRelationships From e3ab691fb2b606d1b763b10ba5caf910ab5ae143 Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Fri, 10 Jun 2022 14:24:40 +0800 Subject: [PATCH 3/4] Remove unnecessary creation of list objects Creating lists out of iterators just to take the first element out is a serious performance problem when done in a tight loop. When adding graph specs to a session the Node Manager was doing exactly that though: some dictionaries' keys where list()-ified to then extract the first element. This commit replaces these list creations by a simpler approach, which is more performance: invoking next() on the iterable yields its first element. This doesn't require creating a new, short-living list object. The extra call to dict.keys() is also unnecessary, since when one iterates over dictionaries alone its keys are yielded. Signed-off-by: Rodrigo Tobar --- daliuge-engine/dlg/graph_loader.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/daliuge-engine/dlg/graph_loader.py b/daliuge-engine/dlg/graph_loader.py index 6d9bdc6f0..636e2123c 100644 --- a/daliuge-engine/dlg/graph_loader.py +++ b/daliuge-engine/dlg/graph_loader.py @@ -134,19 +134,19 @@ def addLink(linkType, lhDropSpec, rhOID, force=False): def removeUnmetRelationships(dropSpecList): unmetRelationships = [] + normalise_oid = lambda oid: next(iter(oid)) if isinstance(oid, dict) else oid + # Step #1: Get all OIDs oids = [] for dropSpec in dropSpecList: - oid = dropSpec["oid"] - oid = list(oid.keys())[0] if isinstance(oid, dict) else oid + oid = normalise_oid(dropSpec["oid"]) oids.append(oid) # Step #2: find unmet relationships and remove them from the original # DROP spec, keeping track of them for dropSpec in dropSpecList: - this_oid = dropSpec["oid"] - this_oid = list(this_oid.keys())[0] if isinstance(this_oid, dict) else this_oid + this_oid = normalise_oid(dropSpec["oid"]) to_delete = [] for rel in dropSpec: @@ -160,7 +160,7 @@ def removeUnmetRelationships(dropSpecList): # removing them from the current DROP spec ds = dropSpec[rel] if isinstance(ds[0], dict): - ds = [list(d.keys())[0] for d in ds] + ds = [next(iter(d)) for d in ds] missingOids = [oid for oid in ds if oid not in oids] for oid in missingOids: unmetRelationships.append(DROPRel(oid, link, this_oid)) @@ -176,8 +176,7 @@ def removeUnmetRelationships(dropSpecList): link = __TOONE[rel] # Check if OID is missing - oid = dropSpec[rel] - oid = list(oid.keys())[0] if isinstance(oid, dict) else oid + oid = normalise_oid(dropSpec[rel]) if oid in oids: continue From d2f1d39a88536b54f1acbc79795ff83314085ce9 Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Fri, 10 Jun 2022 14:30:23 +0800 Subject: [PATCH 4/4] Store oids in set for faster lookups This function first collects all OIDs, and then checks different OIDs for existence in that collection. We have been using a list to store all OIDs though, making the lookups not as fast as they could have been. This commit changes the collection holding all OIDs from a list to a set. This speeds up the lookups of further OIDs in the rest of the method. Signed-off-by: Rodrigo Tobar --- daliuge-engine/dlg/graph_loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/daliuge-engine/dlg/graph_loader.py b/daliuge-engine/dlg/graph_loader.py index 636e2123c..6c21da681 100644 --- a/daliuge-engine/dlg/graph_loader.py +++ b/daliuge-engine/dlg/graph_loader.py @@ -137,10 +137,10 @@ def removeUnmetRelationships(dropSpecList): normalise_oid = lambda oid: next(iter(oid)) if isinstance(oid, dict) else oid # Step #1: Get all OIDs - oids = [] + oids = set() for dropSpec in dropSpecList: oid = normalise_oid(dropSpec["oid"]) - oids.append(oid) + oids.add(oid) # Step #2: find unmet relationships and remove them from the original # DROP spec, keeping track of them