Skip to content

Commit

Permalink
Finally got the managers up and running correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
awicenec committed Feb 28, 2022
1 parent 4653caf commit 3c7ce66
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 8 deletions.
12 changes: 6 additions & 6 deletions daliuge-engine/dlg/deploy/configs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,16 @@ class ICRARoodConfig(DefaultConfig):
"""
# The following is more a workaround than a solution
# requires the user to have a venv exectly in that place
VENV = """
source $HOME/dlg/venv/bin/activate
"""
ACCOUNT = os.environ['USER']
HOME_DIR = os.environ['HOME']
LOG_DIR = f"{HOME_DIR}/dlg/runs"
VENV = f"source {HOME_DIR}/dlg/venv/bin/activate"

def __init__(self):
super(ICRARoodConfig, self).__init__()

def init_list(self): # TODO please fill in
HOME_DIR = os.environ['HOME']
ACCOUNT = os.environ['USER']
return [ACCOUNT, f"{HOME_DIR}/dlg/runs",
return [self.ACCOUNT, self.LOG_DIR,
self.MODULES,
self.VENV]

Expand Down
4 changes: 2 additions & 2 deletions daliuge-engine/dlg/deploy/slurm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,13 @@ def create_job_desc(self, physical_graph_file):
"""
log_dir = "{0}/{1}".format(self._log_root, self.get_log_dirname())
pardict = dict()
pardict["VENV"] = self.venv
pardict["NUM_NODES"] = str(self._num_nodes)
pardict["PIP_NAME"] = self._pip_name
pardict["SESSION_ID"] = os.path.split(log_dir)[-1]
pardict["JOB_DURATION"] = label_job_dur(self._job_dur)
pardict["ACCOUNT"] = self._acc
pardict["PY_BIN"] = sys.executable
pardict["PY_BIN"] = 'python3' if pardict["VENV"] else sys.executable
pardict["LOG_DIR"] = log_dir
pardict["GRAPH_PAR"] = (
'-L "{0}"'.format(self._logical_graph)
Expand All @@ -141,7 +142,6 @@ def create_job_desc(self, physical_graph_file):
pardict["ALL_NICS"] = "-u" if self._all_nics else ""
pardict["CHECK_WITH_SESSION"] = "-S" if self._check_with_session else ""
pardict["MODULES"] = self.modules
pardict["VENV"] = self.venv

job_desc = init_tpl.safe_substitute(pardict)
return job_desc
Expand Down
4 changes: 4 additions & 0 deletions daliuge-engine/dlg/deploy/start_dlg_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ def start_mm(node_list, log_dir, logv=1):


def _stop(endpoints):
LOGGER.info(f"Stopping ThreadPool")
def _the_stop(endpoint):
common.BaseDROPManagerClient(endpoint[0], endpoint[1]).stop()

Expand All @@ -230,14 +231,17 @@ def _the_stop(endpoint):


def stop_nms(ips):
LOGGER.info(f"Stopping node managers on nodes {ips}")
_stop([(ip, NODE_DEFAULT_REST_PORT) for ip in ips])


def stop_dims(ips):
LOGGER.info(f"Stopping island managers on nodes {ips}")
_stop([(ip, ISLAND_DEFAULT_REST_PORT) for ip in ips])


def stop_mm(ip_addr):
LOGGER.info(f"Stopping master managers on node {ip_addr}")
_stop([(ip_addr, MASTER_DEFAULT_REST_PORT)])


Expand Down

0 comments on commit 3c7ce66

Please sign in to comment.