Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix path interpolation between download and fetch #178

Merged
merged 3 commits into from
Jul 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog/178.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Unify path handling between `.FileManager.download` and `.DKISTClient.fetch`.
This means that you can use the same path specification to download the ASDF
files and the FITS files, using keys such as "Dataset ID".
58 changes: 0 additions & 58 deletions dkist/dataset/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import gwcs
from astropy.tests.helper import assert_quantity_allclose

from dkist import net
from dkist.data.test import rootdir
from dkist.dataset import Dataset
from dkist.io import FileManager
Expand Down Expand Up @@ -116,60 +115,3 @@ def test_file_manager():

def test_no_file_manager(dataset_3d):
assert dataset_3d.files is None


@pytest.fixture
def orchestrate_transfer_mock(mocker):
yield mocker.patch("dkist.io.file_manager._orchestrate_transfer_task",
autospec=True)


def test_download_default_keywords(dataset, orchestrate_transfer_mock):
base_path = Path(net.conf.dataset_path.format(**dataset.meta["inventory"]))
folder = Path("/{bucket}/{primaryProposalId}/{datasetId}/".format(**dataset.meta["inventory"]))
file_list = dataset.files.filenames + [folder / "test_dataset.asdf",
folder / "test_dataset.mp4",
folder / "test_dataset.pdf"]
file_list = [base_path / fn for fn in file_list]

dataset.files.download()

orchestrate_transfer_mock.assert_called_once_with(
file_list,
recursive=False,
destination_path=Path('/~'),
destination_endpoint=None,
progress=True,
wait=True,
)


@pytest.mark.parametrize("keywords", [
{"progress": True, "wait": True, "destination_endpoint": None},
{"progress": True, "wait": False, "destination_endpoint": None},
{"progress": False, "wait": True, "destination_endpoint": None},
{"progress": False, "wait": True, "destination_endpoint": "wibble"},
])
def test_download_keywords(dataset, orchestrate_transfer_mock, keywords):
"""
Assert that keywords are passed through as expected
"""
base_path = Path(net.conf.dataset_path.format(**dataset.meta["inventory"]))

folder = Path("/{bucket}/{primaryProposalId}/{datasetId}/".format(**dataset.meta["inventory"]))
file_list = dataset.files.filenames + [folder / "test_dataset.asdf",
folder / "test_dataset.mp4",
folder / "test_dataset.pdf"]
file_list = [base_path / fn for fn in file_list]

dataset.files.download(path="/test/", **keywords)

orchestrate_transfer_mock.assert_called_once_with(
file_list,
recursive=False,
destination_path=Path('/test'),
**keywords
)

if not keywords["destination_endpoint"]:
assert dataset.files.basepath == Path("/test/")
11 changes: 9 additions & 2 deletions dkist/io/file_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from dkist.io.loaders import BaseFITSLoader
from dkist.net import conf as net_conf
from dkist.net.helpers import _orchestrate_transfer_task
from dkist.utils.inventory import humanize_inventory


class BaseStripedExternalArray:
Expand Down Expand Up @@ -328,6 +329,11 @@ def download(self, path=None, destination_endpoint=None, progress=True, wait=Tru
``ds.meta['inventory']``. An example of this would be
``path="~/dkist/{datasetId}"`` to save the files in a folder named
with the dataset ID being downloaded.
If ``path`` is specified, and ``destination_endpoint`` is `None`
(i.e. you are downloading to a local Globus personal endpoint) this
method will set ``.basepath`` to the value of the ``path=``
argument, so that the array can be read from your transferred
files.
eigenbrot marked this conversation as resolved.
Show resolved Hide resolved

destination_endpoint : `str`, optional
A unique specifier for a Globus endpoint. If `None` a local
Expand All @@ -352,11 +358,12 @@ def download(self, path=None, destination_endpoint=None, progress=True, wait=Tru
)

inv = self._ndcube.meta["inventory"]
human_inv = humanize_inventory(inv)

base_path = Path(net_conf.dataset_path.format(**inv))
destination_path = path or self.basepath or "/~/"
destination_path = Path(destination_path).as_posix()
destination_path = Path(destination_path.format(**inv))
destination_path = Path(destination_path.format(**human_inv))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not strictly related to this PR, per se, but it might be nice to have some custom error handling if a user tries to interpolate a key that doesn't exist; right now I'm fairly certain it would just throw KeyError: 'foo'. Even something like a quick note on how to show available keys would probably help a lot.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there's a new key in the inventory but it's not in the map it just copies it through without translating it, if that's what you mean.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. I think I see. Are you saving if a user has destination_path = "{datasetID}_{somekey}_data" then it will interpolate to, e.g., "YAFMS_{somekey}_data"? If so then I'm fine with that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh no, I was mistaken. You can only use keys which exist.

For Fido there's a list, here there isn't really a way. I think I will open an issue to track that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


# TODO: If we are transferring the whole dataset then we should use the
# directory not the list of all the files in it.
Expand All @@ -378,7 +385,7 @@ def download(self, path=None, destination_endpoint=None, progress=True, wait=Tru
)

if is_local:
local_destination = destination_path.relative_to("/").expanduser()
local_destination = destination_path.expanduser().relative_to("/")
if local_destination.root == "":
local_destination = "/" / local_destination
self.basepath = local_destination
80 changes: 80 additions & 0 deletions dkist/io/tests/test_file_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import asdf

from dkist import net
from dkist.data.test import rootdir
from dkist.io.file_manager import FileManager, StripedExternalArray, StripedExternalArrayView

Expand Down Expand Up @@ -161,3 +162,82 @@ def test_reprs(file_manager):
assert str(len(sliced_sea)) in repr(sliced_sea)
assert str(sliced_sea.shape) in repr(sliced_sea)
assert str(sea) in repr(sliced_sea)


@pytest.fixture
def orchestrate_transfer_mock(mocker):
yield mocker.patch("dkist.io.file_manager._orchestrate_transfer_task",
autospec=True)


def test_download_default_keywords(dataset, orchestrate_transfer_mock):
base_path = Path(net.conf.dataset_path.format(**dataset.meta["inventory"]))
folder = Path("/{bucket}/{primaryProposalId}/{datasetId}/".format(**dataset.meta["inventory"]))
file_list = dataset.files.filenames + [folder / "test_dataset.asdf",
folder / "test_dataset.mp4",
folder / "test_dataset.pdf"]
file_list = [base_path / fn for fn in file_list]

dataset.files.download()

orchestrate_transfer_mock.assert_called_once_with(
file_list,
recursive=False,
destination_path=Path('/~'),
destination_endpoint=None,
progress=True,
wait=True,
)


@pytest.mark.parametrize("keywords", [
{"progress": True, "wait": True, "destination_endpoint": None},
{"progress": True, "wait": False, "destination_endpoint": None},
{"progress": False, "wait": True, "destination_endpoint": None},
{"progress": False, "wait": True, "destination_endpoint": "wibble"},
])
def test_download_keywords(dataset, orchestrate_transfer_mock, keywords):
"""
Assert that keywords are passed through as expected
"""
base_path = Path(net.conf.dataset_path.format(**dataset.meta["inventory"]))

folder = Path("/{bucket}/{primaryProposalId}/{datasetId}/".format(**dataset.meta["inventory"]))
file_list = dataset.files.filenames + [folder / "test_dataset.asdf",
folder / "test_dataset.mp4",
folder / "test_dataset.pdf"]
file_list = [base_path / fn for fn in file_list]

dataset.files.download(path="/test/", **keywords)

orchestrate_transfer_mock.assert_called_once_with(
file_list,
recursive=False,
destination_path=Path('/test'),
**keywords
)

if not keywords["destination_endpoint"]:
assert dataset.files.basepath == Path("/test/")


def test_download_path_interpolation(dataset, orchestrate_transfer_mock):
base_path = Path(net.conf.dataset_path.format(**dataset.meta["inventory"]))
folder = Path("/{bucket}/{primaryProposalId}/{datasetId}/".format(**dataset.meta["inventory"]))
file_list = dataset.files.filenames + [folder / "test_dataset.asdf",
folder / "test_dataset.mp4",
folder / "test_dataset.pdf"]
file_list = [base_path / fn for fn in file_list]

dataset.files.download(path="~/{Dataset ID}")

orchestrate_transfer_mock.assert_called_once_with(
file_list,
recursive=False,
destination_path=Path('~/test_dataset/'),
destination_endpoint=None,
progress=True,
wait=True,
)

assert dataset.files.basepath == Path("~/test_dataset").expanduser()
56 changes: 3 additions & 53 deletions dkist/net/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,14 @@
from sunpy.net.base_client import (BaseClient, QueryResponseRow,
QueryResponseTable, convert_row_to_table)

from dkist.utils.inventory import INVENTORY_KEY_MAP

from . import attrs as dattrs
from .attr_walker import walker

__all__ = ["DKISTQueryResponseTable", "DKISTClient"]


class DefaultMap(defaultdict):
"""
A tweak of default dict where the default value is the key that's missing.
"""
def __missing__(self, key):
return key


class DKISTQueryResponseTable(QueryResponseTable):
"""
Results of a DKIST Dataset search.
Expand All @@ -48,50 +42,6 @@ class DKISTQueryResponseTable(QueryResponseTable):
# These keys are shown in the repr and str representations of this class.
_core_keys = TableAttribute(default=["Start Time", "End Time", "Instrument", "Wavelength"])

# Map the keys in the response to human friendly ones.
key_map: Mapping[str, str] = DefaultMap(None, {
"asdfObjectKey": "asdf Filename",
"boundingBox": "Bounding Box",
"browseMovieObjectKey": "Movie Filename",
"browseMovieUrl": "Preview URL",
"bucket": "Storage Bucket",
"contributingExperimentIds": "Experiment IDs",
"contributingProposalIds": "Proposal IDs",
"createDate": "Creation Date",
"datasetId": "Dataset ID",
"datasetSize": "Dataset Size",
"embargoEndDate": "Embargo End Date",
"endTime": "End Time",
"experimentDescription": "Experiment Description",
"exposureTime": "Exposure Time",
"filterWavelengths": "Filter Wavelengths",
"frameCount": "Number of Frames",
"hasAllStokes": "Full Stokes",
"instrumentName": "Instrument",
"isDownloadable": "Downloadable",
"isEmbargoed": "Embargoed",
"observables": "Observables",
"originalFrameCount": "Level 0 Frame count",
"primaryExperimentId": "Primary Experiment ID",
"primaryProposalId": "Primary Proposal ID",
"qualityAverageFriedParameter": "Average Fried Parameter",
"qualityAveragePolarimetricAccuracy": "Average Polarimetric Accuracy",
"recipeId": "Recipe ID",
"recipeInstanceId": "Recipie Instance ID",
"recipeRunId": "Recipie Run ID",
"startTime": "Start Time",
"stokesParameters": "Stokes Parameters",
"targetTypes": "Target Types",
"updateDate": "Last Updated",
"wavelengthMax": "Wavelength Max",
"wavelengthMin": "Wavelength Min",
"hasSpectralAxis": "Has Spectral Axis",
"hasTemporalAxis": "Has Temporal Axis",
"averageDatasetSpectralSampling": "Average Spectral Sampling",
"averageDatasetSpatialSampling": "Average Spatial Sampling",
"averageDatasetTemporalSampling": "Average Temporal Sampling",
"qualityReportObjectKey": "Quality Report Filename",
})

@staticmethod
def _process_table(results: "DKISTQueryResponseTable") -> "DKISTQueryResponseTable":
Expand Down Expand Up @@ -126,7 +76,7 @@ def from_results(cls, results: Iterable[Mapping[str, Any]], *, client: "DKISTCli
new_results = defaultdict(list)
for result in results:
for key, value in result.items():
new_results[cls.key_map[key]].append(value)
new_results[INVENTORY_KEY_MAP[key]].append(value)

data = cls._process_table(cls(new_results, client=client))
data = data._reorder_columns(cls._core_keys.default, remove_empty=True)
Expand Down
7 changes: 4 additions & 3 deletions dkist/net/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import dkist.net
from dkist.net.client import DKISTClient, DKISTQueryResponseTable
from dkist.net.tests import strategies as dst # noqa
from dkist.utils.inventory import INVENTORY_KEY_MAP


@pytest.fixture
Expand Down Expand Up @@ -81,7 +82,7 @@ def example_api_response():

@pytest.fixture
def expected_table_keys():
translated_keys = set(DKISTQueryResponseTable.key_map.values())
translated_keys = set(INVENTORY_KEY_MAP.values())
removed_keys = {'Wavelength Min', 'Wavelength Max'}
added_keys = {'Wavelength'}
expected_keys = translated_keys - removed_keys
Expand Down Expand Up @@ -110,7 +111,7 @@ def test_query_response_from_results(empty_query_response, example_api_response,
assert qr.client is dclient
assert isinstance(qr[0], QueryResponseRow)
assert not set(qr.colnames).difference(expected_table_keys)
assert set(qr.colnames).isdisjoint(DKISTQueryResponseTable.key_map.keys())
assert set(qr.colnames).isdisjoint(INVENTORY_KEY_MAP.keys())


def test_query_response_from_results_unknown_field(empty_query_response, example_api_response, expected_table_keys):
Expand All @@ -127,7 +128,7 @@ def test_query_response_from_results_unknown_field(empty_query_response, example
assert qr.client is dclient
assert isinstance(qr[0], QueryResponseRow)
assert set(qr.colnames).difference(expected_table_keys) == {'spamEggs'}
assert set(qr.colnames).isdisjoint(DKISTQueryResponseTable.key_map.keys())
assert set(qr.colnames).isdisjoint(INVENTORY_KEY_MAP.keys())


def test_length_0_qr(empty_query_response):
Expand Down
Loading