-
Notifications
You must be signed in to change notification settings - Fork 47
Download profiles from blob storage #419
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
42dd4a5
6e019fe
aa8c034
0c3ea4d
55ad3a3
32491bd
1f9291f
d739089
505f53d
27123e5
41cce24
4d5abcc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ | |
| import paramiko | ||
| from tqdm import tqdm | ||
|
|
||
| from powersimdata.data_access.profile_helper import ProfileHelper | ||
| from powersimdata.utility import server_setup | ||
| from powersimdata.utility.helpers import CommandBuilder | ||
|
|
||
|
|
@@ -115,6 +116,15 @@ def push(self, file_name, checksum): | |
| """ | ||
| raise NotImplementedError | ||
|
|
||
| def get_profile_version(self, grid_model, kind): | ||
| """Returns available raw profile from blob storage | ||
|
|
||
| :param str grid_model: grid model. | ||
| :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. | ||
| :return: (*list*) -- available profile version. | ||
| """ | ||
| return ProfileHelper.get_profile_version_cloud(grid_model, kind) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We implicitly return the versions stored on the cloud. I see how it is useful but is that intuitive in the
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking since blob storage is the source of truth, it makes sense as the default. Another way would be to
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right. It makes sense it is the default. |
||
|
|
||
| def close(self): | ||
| """Perform any necessary cleanup for the object.""" | ||
| pass | ||
|
|
@@ -191,6 +201,17 @@ def wrap(s): | |
| ) | ||
| return wrap(None), wrap(proc.stdout), wrap(proc.stderr) | ||
|
|
||
| def get_profile_version(self, grid_model, kind): | ||
| """Returns available raw profile from blob storage or local disk | ||
|
|
||
| :param str grid_model: grid model. | ||
| :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. | ||
| :return: (*list*) -- available profile version. | ||
| """ | ||
| blob_version = super().get_profile_version(grid_model, kind) | ||
| local_version = ProfileHelper.get_profile_version_local(grid_model, kind) | ||
| return list(set(blob_version + local_version)) | ||
|
|
||
|
|
||
| class SSHDataAccess(DataAccess): | ||
| """Interface to a remote data store, accessed via SSH.""" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| import json | ||
| import os | ||
|
|
||
| import requests | ||
| from tqdm.auto import tqdm | ||
|
|
||
| from powersimdata.utility import server_setup | ||
|
|
||
|
|
||
| class ProfileHelper: | ||
| BASE_URL = "https://bescienceswebsite.blob.core.windows.net/profiles" | ||
|
|
||
| @staticmethod | ||
| def get_file_components(scenario_info, field_name): | ||
| """Get the file name and relative path for the given profile and | ||
| scenario. | ||
|
|
||
| :param dict scenario_info: a ScenarioInfo instance | ||
| :param str field_name: the kind of profile | ||
| :return: (*tuple*) -- file name and path | ||
| """ | ||
| version = scenario_info["base_" + field_name] | ||
| file_name = field_name + "_" + version + ".csv" | ||
| grid_model = scenario_info["grid_model"] | ||
| from_dir = os.path.join("raw", grid_model) | ||
| return file_name, from_dir | ||
|
|
||
| @staticmethod | ||
| def download_file(file_name, from_dir): | ||
| """Download the profile from blob storage at the given path | ||
|
|
||
| :param str file_name: profile csv | ||
| :param str from_dir: the path relative to the blob container | ||
| :return: (*str*) -- path to downloaded file | ||
| """ | ||
| print(f"--> Downloading {file_name} from blob storage.") | ||
| url_path = "/".join(os.path.split(from_dir)) | ||
| url = f"{ProfileHelper.BASE_URL}/{url_path}/{file_name}" | ||
| dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name) | ||
| os.makedirs(os.path.dirname(dest), exist_ok=True) | ||
| resp = requests.get(url, stream=True) | ||
| content_length = int(resp.headers.get("content-length", 0)) | ||
| with open(dest, "wb") as f: | ||
| with tqdm( | ||
| unit="B", | ||
| unit_scale=True, | ||
| unit_divisor=1024, | ||
| miniters=1, | ||
| total=content_length, | ||
| ) as pbar: | ||
| for chunk in resp.iter_content(chunk_size=4096): | ||
| f.write(chunk) | ||
| pbar.update(len(chunk)) | ||
|
|
||
| print("--> Done!") | ||
| return dest | ||
|
|
||
| @staticmethod | ||
| def parse_version(grid_model, kind, version): | ||
| """Parse available versions from the given spec | ||
|
|
||
| :param str grid_model: grid model. | ||
| :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. | ||
| :param dict version: version information per grid model | ||
| :return: (*list*) -- available profile version. | ||
| """ | ||
| if grid_model in version and kind in version[grid_model]: | ||
| return version[grid_model][kind] | ||
| print("No %s profiles available." % kind) | ||
| return [] | ||
|
|
||
| @staticmethod | ||
| def get_profile_version_cloud(grid_model, kind): | ||
| """Returns available raw profile from blob storage | ||
|
|
||
| :param str grid_model: grid model. | ||
| :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. | ||
| :return: (*list*) -- available profile version. | ||
| """ | ||
|
|
||
| resp = requests.get(f"{ProfileHelper.BASE_URL}/version.json") | ||
| return ProfileHelper.parse_version(grid_model, kind, resp.json()) | ||
|
|
||
| @staticmethod | ||
| def get_profile_version_local(grid_model, kind): | ||
| """Returns available raw profile from local file | ||
|
|
||
| :param str grid_model: grid model. | ||
| :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. | ||
| :return: (*list*) -- available profile version. | ||
| """ | ||
|
|
||
| version_file = os.path.join(server_setup.LOCAL_DIR, "version.json") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be easier just to list the files in the directory and filter out the ones that match the {kind}_{version}.csv format? Then again, making a user add a new profile to
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah this was kind of a trade off - it's less code to reuse the json format and provides at least one way for a user to customize. Figured it's ok for now, but definitely open to future improvements.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense! We could also probably get some feedback from our users/collaborators to see what they think about usability. But agreed, this looks good for now. |
||
| if not os.path.exists(version_file): | ||
| return [] | ||
| with open(version_file) as f: | ||
| version = json.load(f) | ||
| return ProfileHelper.parse_version(grid_model, kind, version) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| from powersimdata.data_access.profile_helper import ProfileHelper | ||
|
|
||
|
|
||
| def test_parse_version_default(): | ||
| assert [] == ProfileHelper.parse_version("usa_tamu", "solar", {}) | ||
|
|
||
|
|
||
| def test_parse_version_missing_key(): | ||
| version = {"solar": ["v123"]} | ||
| assert [] == ProfileHelper.parse_version("usa_tamu", "solar", version) | ||
|
|
||
|
|
||
| def test_parse_version(): | ||
| expected = ["v123", "v456"] | ||
| version = {"usa_tamu": {"solar": expected}} | ||
| assert expected == ProfileHelper.parse_version("usa_tamu", "solar", version) | ||
| assert [] == ProfileHelper.parse_version("usa_tamu", "hydro", version) | ||
|
|
||
|
|
||
| def test_get_file_components(): | ||
| s_info = {"base_wind": "v8", "grid_model": "europe"} | ||
| file_name, from_dir = ProfileHelper.get_file_components(s_info, "wind") | ||
| assert "wind_v8.csv" == file_name | ||
| assert "raw/europe" == from_dir |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| import pytest | ||
|
|
||
| from powersimdata.input.input_data import InputHelper, _check_field | ||
|
|
||
|
|
||
| def test_get_file_components(): | ||
| s_info = {"id": "123"} | ||
| ct_file, _ = InputHelper.get_file_components(s_info, "ct") | ||
| grid_file, from_dir = InputHelper.get_file_components(s_info, "grid") | ||
| assert "123_ct.pkl" == ct_file | ||
| assert "123_grid.mat" == grid_file | ||
| assert "data/input" == from_dir | ||
|
|
||
|
|
||
| def test_check_field(): | ||
| _check_field("demand") | ||
| _check_field("hydro") | ||
| with pytest.raises(ValueError): | ||
| _check_field("foo") | ||
| _check_field("coal") |
Uh oh!
There was an error while loading. Please reload this page.