From f7029b0f7ae9850c1b7fe89ac114989a72d9cb6c Mon Sep 17 00:00:00 2001 From: ankona <3595025+ankona@users.noreply.github.com> Date: Tue, 30 Jul 2024 19:25:01 -0500 Subject: [PATCH 1/3] Fix dragon install issue --- .github/workflows/run_tests.yml | 4 +- doc/changelog.md | 5 + smartsim/_core/_cli/scripts/dragon_install.py | 16 +-- tests/test_dragon_installer.py | 102 ++++++++++++++++-- 4 files changed, 111 insertions(+), 16 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 6f6648728e..b8e96f05bb 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -119,7 +119,9 @@ jobs: if: (contains( matrix.os, 'ubuntu' ) || contains( matrix.os, 'macos-12')) && ( matrix.subset == 'dragon' ) run: | smart build --device cpu --onnx --dragon -v - echo "LD_LIBRARY_PATH=$(python -c 'import site; print(site.getsitepackages()[0])')/smartsim/_core/.dragon/dragon-0.9/lib:$LD_LIBRARY_PATH" >> $GITHUB_ENV + SP=$(python -c 'import site; print(site.getsitepackages()[0])')/smartsim/_core/config/dragon/.env + LLP=$(cat $SP | grep LD_LIBRARY_PATH | awk '{split($0, array, "="); print array[2]}') + echo "LD_LIBRARY_PATH=$LLP:$LD_LIBRARY_PATH" >> $GITHUB_ENV - name: Install ML Runtimes with Smart (no ONNX,TF on Apple Silicon) if: contains( matrix.os, 'macos-14' ) diff --git a/doc/changelog.md b/doc/changelog.md index a954385cae..912bf369a0 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -30,6 +30,7 @@ To be released at some future point in time Description +- Fix dragon installation bug - Mitigate dependency installation issues - Fix internal host name representation for Dragon backend - Make dependencies more discoverable in setup.py @@ -43,6 +44,10 @@ Description Detailed Notes +- Fixed a bug in the dragon installer where a too-relaxed path is + searched for wheels. The resulting list may cause the wrong wheel to be + installed or may report failures when it attempts to install old wheels. + ([SmartSim-PR652](https://github.com/CrayLabs/SmartSim/pull/652)) - Installation of mypy or dragon in separate build actions caused some dependencies (typing_extensions, numpy) to be upgraded and caused runtime failures. The build actions were tweaked to include diff --git a/smartsim/_core/_cli/scripts/dragon_install.py b/smartsim/_core/_cli/scripts/dragon_install.py index a2e8ed36ff..53730a2601 100644 --- a/smartsim/_core/_cli/scripts/dragon_install.py +++ b/smartsim/_core/_cli/scripts/dragon_install.py @@ -155,15 +155,19 @@ def retrieve_asset(working_dir: pathlib.Path, asset: GitReleaseAsset) -> pathlib :param working_dir: location in file system where assets should be written :param asset: GitHub release asset to retrieve - :returns: path to the downloaded asset""" - if working_dir.exists() and list(working_dir.rglob("*.whl")): - return working_dir + :returns: path to the directory containing the extracted release asset""" + download_dir = working_dir / str(asset.id) + + # if we've previously downloaded the release and still have + # wheels laying around, use that cached version instead + if download_dir.exists() and list(download_dir.rglob("*.whl")): + return download_dir archive = WebTGZ(asset.browser_download_url) - archive.extract(working_dir) + archive.extract(download_dir) - logger.debug(f"Retrieved {asset.browser_download_url} to {working_dir}") - return working_dir + logger.debug(f"Retrieved {asset.browser_download_url} to {download_dir}") + return download_dir def install_package(asset_dir: pathlib.Path) -> int: diff --git a/tests/test_dragon_installer.py b/tests/test_dragon_installer.py index b23a1a7ef0..8b2f92921a 100644 --- a/tests/test_dragon_installer.py +++ b/tests/test_dragon_installer.py @@ -44,6 +44,7 @@ retrieve_asset, retrieve_asset_info, ) +from smartsim._core._install.builder import WebTGZ from smartsim.error.errors import SmartSimCLIActionCancelled # The tests in this file belong to the group_a group @@ -58,14 +59,25 @@ def test_archive(test_dir: str, archive_path: pathlib.Path) -> pathlib.Path: """Fixture for returning a simple tarfile to test on""" num_files = 10 + + archive_name = archive_path.name + archive_name = archive_name.replace(".tar.gz", "") + with tarfile.TarFile.open(archive_path, mode="w:gz") as tar: - mock_whl = pathlib.Path(test_dir) / "mock.whl" + mock_whl = pathlib.Path(test_dir) / archive_name / f"{archive_name}.whl" + mock_whl.parent.mkdir(parents=True, exist_ok=True) mock_whl.touch() + tar.add(mock_whl) + for i in range(num_files): - content = pathlib.Path(test_dir) / f"{i:04}.txt" + content = pathlib.Path(test_dir) / archive_name / f"{i:04}.txt" content.write_text(f"i am file {i}\n") tar.add(content) + content.unlink() + + mock_whl.unlink() + return archive_path @@ -118,6 +130,7 @@ def test_assets(monkeypatch: pytest.MonkeyPatch) -> t.Dict[str, GitReleaseAsset] _git_attr(value=f"http://foo/{archive_name}"), ) monkeypatch.setattr(asset, "_name", _git_attr(value=archive_name)) + monkeypatch.setattr(asset, "_id", _git_attr(value=123)) assets.append(asset) return assets @@ -149,11 +162,22 @@ def test_retrieve_cached( test_archive: pathlib.Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - """Verify that a previously retrieved asset archive is re-used""" - with tarfile.TarFile.open(test_archive) as tar: - tar.extractall(test_dir) + """Verify that a previously retrieved asset archive is re-used and the + release asset retrieval is not attempted""" - ts1 = test_archive.parent.stat().st_ctime + asset_id = 123 + + def mock_webtgz_extract(self_, target_) -> None: + mock_extraction_dir = pathlib.Path(target_) + with tarfile.TarFile.open(test_archive) as tar: + tar.extractall(mock_extraction_dir) + + # we'll use the mock extract to create the files that would normally be downloaded + expected_output_dir = test_archive.parent / str(asset_id) + mock_webtgz_extract(None, expected_output_dir) + + # get modification time of directory holding the "downloaded" archive + ts1 = test_archive.parent.stat().st_mtime requester = Requester( auth=None, @@ -174,16 +198,76 @@ def test_retrieve_cached( # ensure mocked asset has values that we use... monkeypatch.setattr(asset, "_browser_download_url", _git_attr(value="http://foo")) monkeypatch.setattr(asset, "_name", _git_attr(value=mock_archive_name)) + monkeypatch.setattr(asset, "_id", _git_attr(value=asset_id)) + # show that retrieving an asset w/a different ID results in ignoring + # other wheels from prior downloads in the parent directory of the asset asset_path = retrieve_asset(test_archive.parent, asset) - ts2 = asset_path.stat().st_ctime + ts2 = asset_path.stat().st_mtime + # NOTE: the file should be written to a subdir based on the asset ID assert ( - asset_path == test_archive.parent - ) # show that the expected path matches the output path + asset_path == expected_output_dir + ) # shows that the expected path matches the output path assert ts1 == ts2 # show that the file wasn't changed... +def test_retrieve_updated( + test_archive: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Verify that a previously retrieved asset archive is not re-used if a new + version is found""" + + old_asset_id = 100 + asset_id = 123 + + def mock_webtgz_extract(self_, target_) -> None: + mock_extraction_dir = pathlib.Path(target_) + with tarfile.TarFile.open(test_archive) as tar: + tar.extractall(mock_extraction_dir) + + # we'll use the mock extract to create the files that would normally be downloaded + expected_output_dir = test_archive.parent / str(asset_id) + old_output_dir = test_archive.parent / str(old_asset_id) + mock_webtgz_extract(None, old_output_dir) + + requester = Requester( + auth=None, + base_url="https://github.com", + user_agent="mozilla", + per_page=10, + verify=False, + timeout=1, + retry=1, + pool_size=1, + ) + headers = {"mock-header": "mock-value"} + attributes = {"mock-attr": "mock-attr-value"} + completed = True + + asset = GitReleaseAsset(requester, headers, attributes, completed) + + # ensure mocked asset has values that we use... + monkeypatch.setattr(asset, "_browser_download_url", _git_attr(value="http://foo")) + monkeypatch.setattr(asset, "_name", _git_attr(value=mock_archive_name)) + monkeypatch.setattr(asset, "_id", _git_attr(value=asset_id)) + monkeypatch.setattr( + WebTGZ, + "extract", + lambda s_, t_: mock_webtgz_extract(s_, expected_output_dir), + ) # mock the retrieval of the updated archive + + # tell it to retrieve. it should return the path to the new download, not the old one + asset_path = retrieve_asset(test_archive.parent, asset) + + # sanity check we don't have the same paths + assert old_output_dir != expected_output_dir + + # verify the "cached" copy wasn't used + assert asset_path == expected_output_dir + + @pytest.mark.parametrize( "dragon_pin,pyv,is_found,is_crayex", [ From 2617d6a3b12b2a05db5d7755bb1e8fd7fcc683c0 Mon Sep 17 00:00:00 2001 From: ankona <3595025+ankona@users.noreply.github.com> Date: Thu, 1 Aug 2024 15:52:40 -0500 Subject: [PATCH 2/3] avoid force-reinstall --- doc/changelog.md | 6 +---- smartsim/_core/_cli/scripts/dragon_install.py | 24 +++++++++---------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/doc/changelog.md b/doc/changelog.md index 912bf369a0..0ada4e4ec3 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -13,6 +13,7 @@ Jump to: Description +- Fix dragon package installation bug - Adjust schemas for better performance - Add TorchWorker first implementation and mock inference app example - Add error handling in Worker Manager pipeline @@ -30,7 +31,6 @@ To be released at some future point in time Description -- Fix dragon installation bug - Mitigate dependency installation issues - Fix internal host name representation for Dragon backend - Make dependencies more discoverable in setup.py @@ -44,10 +44,6 @@ Description Detailed Notes -- Fixed a bug in the dragon installer where a too-relaxed path is - searched for wheels. The resulting list may cause the wrong wheel to be - installed or may report failures when it attempts to install old wheels. - ([SmartSim-PR652](https://github.com/CrayLabs/SmartSim/pull/652)) - Installation of mypy or dragon in separate build actions caused some dependencies (typing_extensions, numpy) to be upgraded and caused runtime failures. The build actions were tweaked to include diff --git a/smartsim/_core/_cli/scripts/dragon_install.py b/smartsim/_core/_cli/scripts/dragon_install.py index 53730a2601..03a128ab86 100644 --- a/smartsim/_core/_cli/scripts/dragon_install.py +++ b/smartsim/_core/_cli/scripts/dragon_install.py @@ -174,23 +174,21 @@ def install_package(asset_dir: pathlib.Path) -> int: """Install the package found in `asset_dir` into the current python environment :param asset_dir: path to a decompressed archive contents for a release asset""" - wheels = asset_dir.rglob("*.whl") - wheel_path = next(wheels, None) - if not wheel_path: - logger.error(f"No wheel found for package in {asset_dir}") + found_wheels = list(asset_dir.rglob("*.whl")) + if not found_wheels: + logger.error(f"No wheel(s) found for package in {asset_dir}") return 1 - create_dotenv(wheel_path.parent) + create_dotenv(found_wheels[0].parent) - while wheel_path is not None: - logger.info(f"Installing package: {wheel_path.absolute()}") + try: + wheels = list(map(str, found_wheels)) + logger.info("Installing packages:\n%s", "\n".join(wheels)) - try: - pip("install", "--force-reinstall", str(wheel_path), "numpy<2") - wheel_path = next(wheels, None) - except Exception: - logger.error(f"Unable to install from {asset_dir}") - return 1 + pip("install", *wheels) + except Exception: + logger.error(f"Unable to install from {asset_dir}") + return 1 return 0 From 43867432c1adcaf6808c5a19c26f765638544c18 Mon Sep 17 00:00:00 2001 From: Christopher McBride <3595025+ankona@users.noreply.github.com> Date: Thu, 1 Aug 2024 19:31:14 -0400 Subject: [PATCH 3/3] test fix --- tests/test_dragon_installer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_dragon_installer.py b/tests/test_dragon_installer.py index 8b2f92921a..4bf589ad4c 100644 --- a/tests/test_dragon_installer.py +++ b/tests/test_dragon_installer.py @@ -177,7 +177,7 @@ def mock_webtgz_extract(self_, target_) -> None: mock_webtgz_extract(None, expected_output_dir) # get modification time of directory holding the "downloaded" archive - ts1 = test_archive.parent.stat().st_mtime + ts1 = expected_output_dir.stat().st_ctime requester = Requester( auth=None, @@ -203,7 +203,7 @@ def mock_webtgz_extract(self_, target_) -> None: # show that retrieving an asset w/a different ID results in ignoring # other wheels from prior downloads in the parent directory of the asset asset_path = retrieve_asset(test_archive.parent, asset) - ts2 = asset_path.stat().st_mtime + ts2 = asset_path.stat().st_ctime # NOTE: the file should be written to a subdir based on the asset ID assert (