Skip to content

Commit

Permalink
Update hpc.py (#40)
Browse files Browse the repository at this point in the history
* Update hpc.py

* Update hpc.py

* Update test_hpc.py
  • Loading branch information
Andrew-S-Rosen committed Sep 19, 2023
1 parent 106ecf6 commit 25785d0
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 8 deletions.
30 changes: 22 additions & 8 deletions covalent_hpc_plugin/hpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
"create_unique_workdir": False,
"cache_dir": str(Path(get_config("dispatcher.cache_dir")).expanduser().resolve()),
"poll_freq": 60,
"cleanup": True,
}
_DEFAULT = object()

Expand Down Expand Up @@ -147,6 +148,9 @@ class HPCExecutor(AsyncBaseExecutor):
Defaults to the dispatcher's cache directory.
poll_freq: Frequency with which to poll a submitted job. Defaults to 60. Note that settings this value
to be significantly smaller is not advised, as it will cause too frequent SSHs into the remote machine.
cleanup: Whether to clean up the temporary job submission files when done. Set this to False for debugging.
Note that temporary files will be made both in the `remote_workdir` and in `~/.psij`. The latter will
not be cleaned up by the plugin.
log_stdout: Path to file to log stdout to. Defaults to "" (i.e. no logging).
log_stderr: Path to file to log stderr to. Defaults to "" (i.e. no logging).
time_limit: time limit for the task (in seconds). Defaults to -1 (i.e. no time limit). Note that this is
Expand Down Expand Up @@ -180,6 +184,7 @@ def __init__(
create_unique_workdir: bool = _DEFAULT,
cache_dir: str | Path = _DEFAULT,
poll_freq: int = _DEFAULT,
cleanup: bool = _DEFAULT,
# AsyncBaseExecutor parameters
log_stdout: str = "",
log_stderr: str = "",
Expand Down Expand Up @@ -350,6 +355,14 @@ def __init__(
else _EXECUTOR_PLUGIN_DEFAULTS["poll_freq"]
)

self.cleanup = (
cleanup
if cleanup != _DEFAULT
else hpc_config["cleanup"]
if "cleanup" in hpc_config
else _EXECUTOR_PLUGIN_DEFAULTS["cleanup"]
)

if self.poll_freq < 30:
print("Polling frequency will be increased to 30 seconds.")
self.poll_freq = 30
Expand Down Expand Up @@ -887,14 +900,15 @@ async def teardown(self, task_metadata: dict) -> None:
Returns:
None
"""
app_log.debug("Performing cleanup on remote...")
conn = await self._client_connect()
await self._perform_cleanup(conn)

app_log.debug("Closing SSH connection...")
conn.close()
await conn.wait_closed()
app_log.debug("SSH connection closed, teardown complete")
if self.cleanup:
app_log.debug("Performing cleanup on remote...")
conn = await self._client_connect()
await self._perform_cleanup(conn)

app_log.debug("Closing SSH connection...")
conn.close()
await conn.wait_closed()
app_log.debug("SSH connection closed, teardown complete")

async def _perform_cleanup(self, conn: asyncssh.SSHClientConnection) -> None:
"""
Expand Down
3 changes: 3 additions & 0 deletions tests/test_hpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def test_init_defaults(tmpdir):
Path(get_config("dispatcher.cache_dir")).expanduser().resolve()
)
assert executor.poll_freq == 60
assert executor.cleanup is True


def test_init_nondefaults(tmpdir):
Expand Down Expand Up @@ -155,6 +156,7 @@ def test_init_nondefaults(tmpdir):
create_unique_workdir=True,
cache_dir=tmpdir / "my-cache-dir",
poll_freq=90,
cleanup=False,
)
assert executor.username == username
assert executor.address == address
Expand All @@ -172,6 +174,7 @@ def test_init_nondefaults(tmpdir):
assert executor.create_unique_workdir == True
assert executor.cache_dir == tmpdir / "my-cache-dir"
assert executor.poll_freq == 90
assert executor.cleanup is False
assert os.path.exists(tmpdir / "my-cache-dir")


Expand Down

0 comments on commit 25785d0

Please sign in to comment.