From a8442e22d4dd2b446c9b5f8c979871b71d349c3b Mon Sep 17 00:00:00 2001 From: Dmitry Frolov Date: Sat, 16 Mar 2024 15:16:02 -0400 Subject: [PATCH 1/2] Feat(plugins): adding KeepMachinesAfterStop flag propagation --- requirements/app/app.txt | 2 +- src/lightning/app/plugin/plugin.py | 6 ++++++ src/lightning/app/runners/cloud.py | 6 ++++++ tests/tests_app/plugin/test_plugin.py | 6 ++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/requirements/app/app.txt b/requirements/app/app.txt index 3c7dbae240628..d2ccf8bb9d970 100644 --- a/requirements/app/app.txt +++ b/requirements/app/app.txt @@ -1,4 +1,4 @@ -lightning-cloud == 0.5.64 # Must be pinned to ensure compatibility +lightning-cloud == 0.5.65 # Must be pinned to ensure compatibility packaging typing-extensions >=4.4.0, <4.10.0 deepdiff >=5.7.0, <6.6.0 diff --git a/src/lightning/app/plugin/plugin.py b/src/lightning/app/plugin/plugin.py index 68bcec0ed22e3..78165ba065d2d 100644 --- a/src/lightning/app/plugin/plugin.py +++ b/src/lightning/app/plugin/plugin.py @@ -46,6 +46,7 @@ def __init__(self) -> None: self.cloudspace_id = "" self.cluster_id = "" self.source_app = "" + self.keep_machines_after_stop = False def run(self, *args: str, **kwargs: str) -> Externalv1LightningappInstance: """Override with the logic to execute on the cloudspace.""" @@ -95,6 +96,7 @@ def run_job(self, name: str, app_entrypoint: str, env_vars: Dict[str, str] = {}) name=name, cluster_id=self.cluster_id, source_app=self.source_app, + keep_machines_after_stop=self.keep_machines_after_stop, ) def _setup( @@ -103,11 +105,13 @@ def _setup( cloudspace_id: str, cluster_id: str, source_app: str, + keep_machines_after_stop: bool, ) -> None: self.source_app = source_app self.project_id = project_id self.cloudspace_id = cloudspace_id self.cluster_id = cluster_id + self.keep_machines_after_stop = keep_machines_after_stop class _Run(BaseModel): @@ -118,6 +122,7 @@ class _Run(BaseModel): cluster_id: str plugin_arguments: Dict[str, str] source_app: str + keep_machines_after_stop: bool def _run_plugin(run: _Run) -> Dict[str, Any]: @@ -191,6 +196,7 @@ def _run_plugin(run: _Run) -> Dict[str, Any]: cloudspace_id=run.cloudspace_id, cluster_id=run.cluster_id, source_app=run.source_app, + keep_machines_after_stop=run.keep_machines_after_stop, ) app_instance = plugin.run(**run.plugin_arguments) return _to_clean_dict(app_instance, True) diff --git a/src/lightning/app/runners/cloud.py b/src/lightning/app/runners/cloud.py index f0d0fe224669b..6956dbe2566d6 100644 --- a/src/lightning/app/runners/cloud.py +++ b/src/lightning/app/runners/cloud.py @@ -197,6 +197,7 @@ def cloudspace_dispatch( name: str, cluster_id: str, source_app: Optional[str] = None, + keep_machines_after_stop: Optional[bool] = None, ) -> Externalv1LightningappInstance: """Slim dispatch for creating runs from a cloudspace. This dispatch avoids resolution of some properties such as the project and cluster IDs that are instead passed directly. @@ -206,6 +207,8 @@ def cloudspace_dispatch( cloudspace_id: The ID of the cloudspace. name: The name for the run. cluster_id: The ID of the cluster to run on. + source_app: Name of the source app that triggered the run. + keep_machines_after_stop: If true, machines will be left running after the run is finished (and can be reused after) Raises: ApiException: If there was an issue in the backend. @@ -278,6 +281,7 @@ def cloudspace_dispatch( queue_server_type, env_vars, source_app=source_app, + keep_machines_after_stop=keep_machines_after_stop, ) def dispatch( @@ -1016,6 +1020,7 @@ def _api_create_run_instance( env_vars: Optional[List[V1EnvVar]] = None, auth: Optional[V1LightningAuth] = None, source_app: Optional[str] = None, + keep_machines_after_stop: Optional[bool] = None, ) -> Externalv1LightningappInstance: """Create a new instance of the given run with the given specification.""" return self.backend.client.cloud_space_service_create_lightning_run_instance( @@ -1030,6 +1035,7 @@ def _api_create_run_instance( env=env_vars, auth=auth, source_app=source_app, + keep_machines_after_stop=keep_machines_after_stop, ), ) diff --git a/tests/tests_app/plugin/test_plugin.py b/tests/tests_app/plugin/test_plugin.py index a81acded8a78b..c4867af04dd2b 100644 --- a/tests/tests_app/plugin/test_plugin.py +++ b/tests/tests_app/plugin/test_plugin.py @@ -85,6 +85,7 @@ def run(self): cluster_id="any", plugin_arguments={}, source_app="any", + keep_machines_after_stop=False, ), "Error downloading plugin source:", None, @@ -99,6 +100,7 @@ def run(self): cluster_id="any", plugin_arguments={}, source_app="any", + keep_machines_after_stop=False, ), "Error extracting plugin source:", None, @@ -113,6 +115,7 @@ def run(self): cluster_id="any", plugin_arguments={}, source_app="any", + keep_machines_after_stop=False, ), "Error loading plugin:", "plugin.py", @@ -127,6 +130,7 @@ def run(self): cluster_id="any", plugin_arguments={}, source_app="any", + keep_machines_after_stop=False, ), "Error running plugin:", "plugin.py", @@ -175,6 +179,7 @@ def test_run_job(mock_requests, mock_cloud_runtime, mock_cloud_backend, mock_plu cluster_id="test_cluster_id", plugin_arguments={"name": "test_name", "entrypoint": "test_entrypoint"}, source_app="test_source_app", + keep_machines_after_stop=True, ) mock_app = mock.MagicMock() @@ -207,6 +212,7 @@ def test_run_job(mock_requests, mock_cloud_runtime, mock_cloud_backend, mock_plu name="test_name", cluster_id=body.cluster_id, source_app=body.source_app, + keep_machines_after_stop=body.keep_machines_after_stop, ) From e5dd2ee23951855d49663204eba2b7262bba73e2 Mon Sep 17 00:00:00 2001 From: Dmitry Frolov Date: Sat, 16 Mar 2024 15:29:21 -0400 Subject: [PATCH 2/2] Feat(plugins): adding KeepMachinesAfterStop flag propagation --- src/lightning/app/runners/cloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lightning/app/runners/cloud.py b/src/lightning/app/runners/cloud.py index 6956dbe2566d6..c488014450b9b 100644 --- a/src/lightning/app/runners/cloud.py +++ b/src/lightning/app/runners/cloud.py @@ -208,7 +208,7 @@ def cloudspace_dispatch( name: The name for the run. cluster_id: The ID of the cluster to run on. source_app: Name of the source app that triggered the run. - keep_machines_after_stop: If true, machines will be left running after the run is finished (and can be reused after) + keep_machines_after_stop: If true, machines will be left running after the run is finished and reused after Raises: ApiException: If there was an issue in the backend.