-
Notifications
You must be signed in to change notification settings - Fork 194
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Make HTEX scale-down be aware of unstarted blocks (#3353)
Prior to this PR, the scale_in code for the HighThroughputExecutor will not scale in any block that has not had at least one manager register with the interchange, because it retrieves the list of blocks from the interchange. This is documented in issue #3232 This PR makes the htex scale in code also pay attention to blocks in the status_facade list - which includes blocks that have been submitted, and blocks which have been reported by the provider mechanism.
- Loading branch information
1 parent
e155bd1
commit e4b3c3b
Showing
2 changed files
with
88 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
74 changes: 74 additions & 0 deletions
74
parsl/tests/test_scaling/test_scale_down_htex_unregistered.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import logging | ||
import time | ||
|
||
import pytest | ||
|
||
import parsl | ||
|
||
from parsl import File, python_app | ||
from parsl.jobs.states import JobState, TERMINAL_STATES | ||
from parsl.providers import LocalProvider | ||
from parsl.channels import LocalChannel | ||
from parsl.launchers import SingleNodeLauncher | ||
from parsl.config import Config | ||
from parsl.executors import HighThroughputExecutor | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
_max_blocks = 1 | ||
_min_blocks = 0 | ||
|
||
|
||
def local_config(): | ||
return Config( | ||
executors=[ | ||
HighThroughputExecutor( | ||
heartbeat_period=1, | ||
heartbeat_threshold=2, | ||
poll_period=100, | ||
label="htex_local", | ||
address="127.0.0.1", | ||
max_workers=1, | ||
encrypted=True, | ||
launch_cmd="sleep inf", | ||
provider=LocalProvider( | ||
channel=LocalChannel(), | ||
init_blocks=1, | ||
max_blocks=_max_blocks, | ||
min_blocks=_min_blocks, | ||
launcher=SingleNodeLauncher(), | ||
), | ||
) | ||
], | ||
max_idletime=0.5, | ||
strategy='htex_auto_scale', | ||
strategy_period=0.1 | ||
) | ||
|
||
|
||
# see issue #1885 for details of failures of this test. | ||
# at the time of issue #1885 this test was failing frequently | ||
# in CI. | ||
@pytest.mark.local | ||
def test_scaledown_with_register(try_assert): | ||
dfk = parsl.dfk() | ||
htex = dfk.executors['htex_local'] | ||
|
||
num_managers = len(htex.connected_managers()) | ||
assert num_managers == 0, "Expected 0 managers at start" | ||
|
||
try_assert(lambda: len(htex.status()), | ||
fail_msg="Expected 1 block at start") | ||
|
||
s = htex.status() | ||
assert s['0'].state == JobState.RUNNING, "Expected block to be in RUNNING" | ||
|
||
def check_zero_blocks(): | ||
s = htex.status() | ||
return len(s) == 1 and s['0'].state in TERMINAL_STATES | ||
|
||
try_assert( | ||
check_zero_blocks, | ||
fail_msg="Expected 0 blocks after idle scaledown", | ||
timeout_ms=15000, | ||
) |