From 42dd4a5e2aa83bc31e33793434d6c5e45d207162 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Tue, 16 Mar 2021 16:19:49 -0700 Subject: [PATCH 01/12] feat: get profile versions from blob storage --- powersimdata/input/input_data.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py index 94007d0c5..65b2a40b7 100644 --- a/powersimdata/input/input_data.py +++ b/powersimdata/input/input_data.py @@ -2,6 +2,7 @@ import posixpath import pandas as pd +import requests from powersimdata.data_access.context import Context from powersimdata.utility import server_setup @@ -11,6 +12,8 @@ profile_kind = {"demand", "hydro", "solar", "wind"} +BLOB_STORAGE = "https://bescienceswebsite.blob.core.windows.net/profiles" + class InputData(object): """Load input data. @@ -85,7 +88,7 @@ def get_data(self, scenario_info, field_name): return data def get_profile_version(self, grid_model, kind): - """Returns available raw profile either from server or local directory. + """Returns available raw profile from blob storage :param str grid_model: grid model. :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. @@ -96,20 +99,12 @@ def get_profile_version(self, grid_model, kind): if kind not in profile_kind: raise ValueError("kind must be one of %s" % " | ".join(profile_kind)) - query = posixpath.join( - server_setup.DATA_ROOT_DIR, - server_setup.BASE_PROFILE_DIR, - grid_model, - kind + "_*", - ) - stdin, stdout, stderr = self.data_access.execute_command("ls " + query) - if len(stderr.readlines()) != 0: + resp = requests.get(f"{BLOB_STORAGE}/{grid_model}/version.json") + versions = resp.json() + if kind not in versions: print("No %s profiles available." % kind) - version = [] else: - filename = [os.path.basename(line.rstrip()) for line in stdout.readlines()] - version = [f[f.rfind("_") + 1 : -4] for f in filename] - return version + return versions[kind] def _read_data(filepath): From 6e019fed2f620eb92379b09efd2503a9068485e7 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Tue, 16 Mar 2021 17:46:09 -0700 Subject: [PATCH 02/12] feat: download profiles from blob storage --- powersimdata/input/input_data.py | 93 +++++++++++++++++++++----------- powersimdata/scenario/create.py | 2 +- 2 files changed, 64 insertions(+), 31 deletions(-) diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py index 65b2a40b7..d788f08cd 100644 --- a/powersimdata/input/input_data.py +++ b/powersimdata/input/input_data.py @@ -1,5 +1,5 @@ import os -import posixpath +import shutil import pandas as pd import requests @@ -15,6 +15,60 @@ BLOB_STORAGE = "https://bescienceswebsite.blob.core.windows.net/profiles" +_file_extension = { + **{"ct": "pkl", "grid": "mat"}, + **{k: "csv" for k in profile_kind}, +} + + +class InputHelper: + def __init__(self, data_access): + self.data_access = data_access + + @staticmethod + def get_file_components(scenario_info, field_name): + ext = _file_extension[field_name] + file_name = scenario_info["id"] + "_" + field_name + "." + ext + from_dir = server_setup.INPUT_DIR + return file_name, from_dir + + def download_file(self, file_name, from_dir): + self.data_access.copy_from(file_name, from_dir) + + +class ProfileHelper: + @staticmethod + def get_file_components(scenario_info, field_name): + ext = _file_extension[field_name] + version = scenario_info["base_" + field_name] + file_name = field_name + "_" + version + "." + ext + from_dir = scenario_info["grid_model"] + return file_name, from_dir + + @staticmethod + def download_file(file_name, from_dir): + url = f"{BLOB_STORAGE}/{from_dir}/{file_name}" + dest = os.path.join(server_setup.LOCAL_DIR, file_name) + with requests.get(url, stream=True) as r: + with open(dest, "wb") as f: + shutil.copyfileobj(r.raw, f) + + return dest + + +def _check_field(field_name): + """Checks field name. + + :param str field_name: *'demand'*, *'hydro'*, *'solar'*, *'wind'*, + *'ct'* or *'grid'*. + :raises ValueError: if not *'demand'*, *'hydro'*, *'solar'*, *'wind'* + *'ct'* or *'grid'* + """ + possible = list(_file_extension.keys()) + if field_name not in possible: + raise ValueError("Only %s data can be loaded" % " | ".join(possible)) + + class InputData(object): """Load input data. @@ -25,25 +79,8 @@ def __init__(self, data_loc=None): """Constructor.""" os.makedirs(server_setup.LOCAL_DIR, exist_ok=True) - self.file_extension = { - **{"ct": "pkl", "grid": "mat"}, - **{k: "csv" for k in profile_kind}, - } - self.data_access = Context.get_data_access(data_loc) - def _check_field(self, field_name): - """Checks field name. - - :param str field_name: *'demand'*, *'hydro'*, *'solar'*, *'wind'*, - *'ct'* or *'grid'*. - :raises ValueError: if not *'demand'*, *'hydro'*, *'solar'*, *'wind'* - *'ct'* or *'grid'* - """ - possible = list(self.file_extension.keys()) - if field_name not in possible: - raise ValueError("Only %s data can be loaded" % " | ".join(possible)) - def get_data(self, scenario_info, field_name): """Returns data either from server or local directory. @@ -55,20 +92,15 @@ def get_data(self, scenario_info, field_name): dictionary, or the path to a matfile enclosing the grid data. :raises FileNotFoundError: if file not found on local machine. """ - self._check_field(field_name) - + _check_field(field_name) print("--> Loading %s" % field_name) - ext = self.file_extension[field_name] if field_name in profile_kind: - version = scenario_info["base_" + field_name] - file_name = field_name + "_" + version + "." + ext - from_dir = posixpath.join( - server_setup.BASE_PROFILE_DIR, scenario_info["grid_model"] - ) + helper = ProfileHelper else: - file_name = scenario_info["id"] + "_" + field_name + "." + ext - from_dir = server_setup.INPUT_DIR + helper = InputHelper(self.data_access) + + file_name, from_dir = helper.get_file_components(scenario_info, field_name) filepath = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name) key = cache_key(filepath) @@ -82,12 +114,13 @@ def get_data(self, scenario_info, field_name): "%s not found in %s on local machine" % (file_name, server_setup.LOCAL_DIR) ) - self.data_access.copy_from(file_name, from_dir) + helper.download_file(file_name, from_dir) data = _read_data(filepath) _cache.put(key, data) return data - def get_profile_version(self, grid_model, kind): + @staticmethod + def get_profile_version(grid_model, kind): """Returns available raw profile from blob storage :param str grid_model: grid model. diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py index 6008aca6b..e8d5b99ed 100644 --- a/powersimdata/scenario/create.py +++ b/powersimdata/scenario/create.py @@ -334,7 +334,7 @@ def get_base_profile(self, kind): :param str kind: one of *'demand'*, *'hydro'*, *'solar'*, *'wind'*. :return: (*list*) -- available version for selected profile kind. """ - return InputData().get_profile_version(self.grid_model, kind) + return InputData.get_profile_version(self.grid_model, kind) def set_base_profile(self, kind, version): """Sets demand profile. From aa8c034bed4756c606cf4f738eed371bfdec619d Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Wed, 17 Mar 2021 13:08:46 -0700 Subject: [PATCH 03/12] fix: use consistent paths --- powersimdata/input/input_data.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py index d788f08cd..e16dd9db6 100644 --- a/powersimdata/input/input_data.py +++ b/powersimdata/input/input_data.py @@ -42,17 +42,21 @@ def get_file_components(scenario_info, field_name): ext = _file_extension[field_name] version = scenario_info["base_" + field_name] file_name = field_name + "_" + version + "." + ext - from_dir = scenario_info["grid_model"] + grid_model = scenario_info["grid_model"] + from_dir = f"{server_setup.BASE_PROFILE_DIR}/{grid_model}" return file_name, from_dir @staticmethod def download_file(file_name, from_dir): + print(f"--> Downloading {file_name} from blob storage.") url = f"{BLOB_STORAGE}/{from_dir}/{file_name}" - dest = os.path.join(server_setup.LOCAL_DIR, file_name) + dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name) + os.makedirs(os.path.dirname(dest), exist_ok=True) with requests.get(url, stream=True) as r: with open(dest, "wb") as f: shutil.copyfileobj(r.raw, f) + print("--> Done!") return dest @@ -132,7 +136,7 @@ def get_profile_version(grid_model, kind): if kind not in profile_kind: raise ValueError("kind must be one of %s" % " | ".join(profile_kind)) - resp = requests.get(f"{BLOB_STORAGE}/{grid_model}/version.json") + resp = requests.get(f"{BLOB_STORAGE}/raw/{grid_model}/version.json") versions = resp.json() if kind not in versions: print("No %s profiles available." % kind) From 0c3ea4d22de85f4fd016c3d99b38a31c76d3213a Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Wed, 17 Mar 2021 13:49:15 -0700 Subject: [PATCH 04/12] feat: progress bar for download --- powersimdata/input/input_data.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py index e16dd9db6..04e757264 100644 --- a/powersimdata/input/input_data.py +++ b/powersimdata/input/input_data.py @@ -1,8 +1,8 @@ import os -import shutil import pandas as pd import requests +from tqdm.auto import tqdm from powersimdata.data_access.context import Context from powersimdata.utility import server_setup @@ -52,9 +52,19 @@ def download_file(file_name, from_dir): url = f"{BLOB_STORAGE}/{from_dir}/{file_name}" dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name) os.makedirs(os.path.dirname(dest), exist_ok=True) - with requests.get(url, stream=True) as r: - with open(dest, "wb") as f: - shutil.copyfileobj(r.raw, f) + resp = requests.get(url, stream=True) + content_length = int(resp.headers.get("content-length", 0)) + with open(dest, "wb") as f: + with tqdm( + unit="B", + unit_scale=True, + unit_divisor=1024, + miniters=1, + total=content_length, + ) as pbar: + for chunk in resp.iter_content(chunk_size=4096): + f.write(chunk) + pbar.update(len(chunk)) print("--> Done!") return dest From 55ad3a35f759a1759780e614ba4ca44469c407a2 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Thu, 18 Mar 2021 11:53:30 -0700 Subject: [PATCH 05/12] refactor: use top level version list to simplify paths --- powersimdata/input/input_data.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py index 04e757264..b37ffa5a3 100644 --- a/powersimdata/input/input_data.py +++ b/powersimdata/input/input_data.py @@ -12,8 +12,6 @@ profile_kind = {"demand", "hydro", "solar", "wind"} -BLOB_STORAGE = "https://bescienceswebsite.blob.core.windows.net/profiles" - _file_extension = { **{"ct": "pkl", "grid": "mat"}, @@ -21,6 +19,9 @@ } +BASE_URL = "https://bescienceswebsite.blob.core.windows.net/profiles" + + class InputHelper: def __init__(self, data_access): self.data_access = data_access @@ -49,7 +50,7 @@ def get_file_components(scenario_info, field_name): @staticmethod def download_file(file_name, from_dir): print(f"--> Downloading {file_name} from blob storage.") - url = f"{BLOB_STORAGE}/{from_dir}/{file_name}" + url = f"{BASE_URL}/{from_dir}/{file_name}" dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name) os.makedirs(os.path.dirname(dest), exist_ok=True) resp = requests.get(url, stream=True) @@ -146,12 +147,11 @@ def get_profile_version(grid_model, kind): if kind not in profile_kind: raise ValueError("kind must be one of %s" % " | ".join(profile_kind)) - resp = requests.get(f"{BLOB_STORAGE}/raw/{grid_model}/version.json") - versions = resp.json() - if kind not in versions: - print("No %s profiles available." % kind) - else: - return versions[kind] + resp = requests.get(f"{BASE_URL}/version.json") + version = resp.json() + if grid_model in version and kind in version[grid_model]: + return version[grid_model][kind] + print("No %s profiles available." % kind) def _read_data(filepath): From 32491bd5486f4bf87d41e98b26b7f2333ce63265 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Fri, 19 Mar 2021 11:07:52 -0700 Subject: [PATCH 06/12] chore: remove base profile dir and fix pandas warning --- powersimdata/input/input_data.py | 2 +- powersimdata/input/transform_profile.py | 2 +- powersimdata/scenario/move.py | 17 ----------------- powersimdata/utility/server_setup.py | 4 ---- 4 files changed, 2 insertions(+), 23 deletions(-) diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py index b37ffa5a3..062c09a3b 100644 --- a/powersimdata/input/input_data.py +++ b/powersimdata/input/input_data.py @@ -44,7 +44,7 @@ def get_file_components(scenario_info, field_name): version = scenario_info["base_" + field_name] file_name = field_name + "_" + version + "." + ext grid_model = scenario_info["grid_model"] - from_dir = f"{server_setup.BASE_PROFILE_DIR}/{grid_model}" + from_dir = f"raw/{grid_model}" return file_name, from_dir @staticmethod diff --git a/powersimdata/input/transform_profile.py b/powersimdata/input/transform_profile.py index 69cbf4603..1e7826d89 100644 --- a/powersimdata/input/transform_profile.py +++ b/powersimdata/input/transform_profile.py @@ -103,7 +103,7 @@ def _get_demand_profile(self): :return: (*pandas.DataFrame*) -- data frame of demand. """ zone_id = sorted(self.grid.bus.zone_id.unique()) - demand = self._input_data.get_data(self.scenario_info, "demand")[zone_id] + demand = self._input_data.get_data(self.scenario_info, "demand").loc[:, zone_id] if bool(self.ct) and "demand" in list(self.ct.keys()): for key, value in self.ct["demand"]["zone_id"].items(): print( diff --git a/powersimdata/scenario/move.py b/powersimdata/scenario/move.py index f93d6afc4..2a3743ef2 100644 --- a/powersimdata/scenario/move.py +++ b/powersimdata/scenario/move.py @@ -35,7 +35,6 @@ def move_scenario(self, target="disk"): backup = BackUpDisk(self._data_access, self._scenario_info) backup.move_input_data() - backup.copy_base_profile() backup.move_output_data() backup.move_temporary_folder() @@ -76,22 +75,6 @@ def move_input_data(self): self._data_access.copy(source, target, update=True) self._data_access.remove(source, recursive=True, force=True) - def copy_base_profile(self): - """Copies base profile""" - print("--> Copying base profiles to backup disk") - for kind in ["demand", "hydro", "solar", "wind"]: - src = posixpath.join( - self.server_config.base_profile_dir(), - self._scenario_info["grid_model"], - kind + "_" + self._scenario_info["base_" + kind] + ".csv", - ) - dest = posixpath.join( - self.backup_config.base_profile_dir(), self._scenario_info["grid_model"] - ) - _, stdout, stderr = self._data_access.copy(src, dest, update=True) - print(stdout.readlines()) - print(stderr.readlines()) - def move_output_data(self): """Moves output data""" print("--> Moving scenario output data to backup disk") diff --git a/powersimdata/utility/server_setup.py b/powersimdata/utility/server_setup.py index aeea66cfb..07fdcb759 100644 --- a/powersimdata/utility/server_setup.py +++ b/powersimdata/utility/server_setup.py @@ -7,7 +7,6 @@ BACKUP_DATA_ROOT_DIR = "/mnt/RE-Storage/v2" DATA_ROOT_DIR = "/mnt/bes/pcm" EXECUTE_DIR = "tmp" -BASE_PROFILE_DIR = "raw" INPUT_DIR = "data/input" OUTPUT_DIR = "data/output" LOCAL_DIR = os.path.join(Path.home(), "ScenarioData", "") @@ -36,9 +35,6 @@ def _join(self, rel_path): def execute_dir(self): return self._join(EXECUTE_DIR) - def base_profile_dir(self): - return self._join(BASE_PROFILE_DIR) - def input_dir(self): return self._join(INPUT_DIR) From 1f9291f6ad89d6c56c5d6f56f50575c397cc164a Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Fri, 19 Mar 2021 17:04:37 -0700 Subject: [PATCH 07/12] feat: support custom profiles through local version.json --- powersimdata/data_access/data_access.py | 25 ++++++++ powersimdata/data_access/profile_helper.py | 72 ++++++++++++++++++++++ powersimdata/input/input_data.py | 56 ++--------------- powersimdata/scenario/create.py | 2 +- 4 files changed, 102 insertions(+), 53 deletions(-) create mode 100644 powersimdata/data_access/profile_helper.py diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index a2fc0c02a..3fc82a4a5 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -1,3 +1,4 @@ +import json import operator import os import posixpath @@ -7,6 +8,7 @@ import paramiko from tqdm import tqdm +from powersimdata.data_access.profile_helper import ProfileHelper from powersimdata.utility import server_setup from powersimdata.utility.helpers import CommandBuilder @@ -115,6 +117,9 @@ def push(self, file_name, checksum): """ raise NotImplementedError + def get_profile_version(self, grid_model, kind): + return ProfileHelper.get_profile_version(grid_model, kind) + def close(self): """Perform any necessary cleanup for the object.""" pass @@ -191,6 +196,26 @@ def wrap(s): ) return wrap(None), wrap(proc.stdout), wrap(proc.stderr) + def get_profile_version(self, grid_model, kind): + """Returns available raw profile from blob storage or local disk + + :param str grid_model: grid model. + :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. + :return: (*list*) -- available profile version. + """ + blob_versions = super().get_profile_version(grid_model, kind) + version_file = os.path.join(server_setup.LOCAL_DIR, "version.json") + if not os.path.exists(version_file): + return blob_versions + with open(version_file) as f: + version = json.load(f) + return list( + set( + blob_versions + + ProfileHelper.parse_version(grid_model, kind, version) + ) + ) + class SSHDataAccess(DataAccess): """Interface to a remote data store, accessed via SSH.""" diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py new file mode 100644 index 000000000..aa53fe302 --- /dev/null +++ b/powersimdata/data_access/profile_helper.py @@ -0,0 +1,72 @@ +import os + +import requests +from tqdm.auto import tqdm + +from powersimdata.utility import server_setup + + +class ProfileHelper: + BASE_URL = "https://bescienceswebsite.blob.core.windows.net/profiles" + + @staticmethod + def get_file_components(scenario_info, field_name): + version = scenario_info["base_" + field_name] + file_name = field_name + "_" + version + ".csv" + grid_model = scenario_info["grid_model"] + from_dir = f"raw/{grid_model}" + return file_name, from_dir + + @staticmethod + def download_file(file_name, from_dir): + print(f"--> Downloading {file_name} from blob storage.") + url = f"{ProfileHelper.BASE_URL}/{from_dir}/{file_name}" + dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name) + os.makedirs(os.path.dirname(dest), exist_ok=True) + resp = requests.get(url, stream=True) + content_length = int(resp.headers.get("content-length", 0)) + with open(dest, "wb") as f: + with tqdm( + unit="B", + unit_scale=True, + unit_divisor=1024, + miniters=1, + total=content_length, + ) as pbar: + for chunk in resp.iter_content(chunk_size=4096): + f.write(chunk) + pbar.update(len(chunk)) + + print("--> Done!") + return dest + + @staticmethod + def parse_version(grid_model, kind, version): + """Parse available versions from the given spec + + :param str grid_model: grid model. + :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. + :param dict version: json response + :return: (*list*) -- available profile version. + :raises ValueError: if kind not one of *'demand'*, *'hydro'*, *'solar'* or + *'wind'*. + """ + profile_kind = {"demand", "hydro", "solar", "wind"} + if kind not in profile_kind: + raise ValueError("kind must be one of %s" % " | ".join(profile_kind)) + + if grid_model in version and kind in version[grid_model]: + return version[grid_model][kind] + print("No %s profiles available." % kind) + + @staticmethod + def get_profile_version(grid_model, kind): + """Returns available raw profile from blob storage + + :param str grid_model: grid model. + :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. + :return: (*list*) -- available profile version. + """ + + resp = requests.get(f"{ProfileHelper.BASE_URL}/version.json") + return ProfileHelper.parse_version(grid_model, kind, resp.json()) diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py index 062c09a3b..c6a42aa06 100644 --- a/powersimdata/input/input_data.py +++ b/powersimdata/input/input_data.py @@ -1,10 +1,9 @@ import os import pandas as pd -import requests -from tqdm.auto import tqdm from powersimdata.data_access.context import Context +from powersimdata.data_access.profile_helper import ProfileHelper from powersimdata.utility import server_setup from powersimdata.utility.helpers import MemoryCache, cache_key @@ -19,9 +18,6 @@ } -BASE_URL = "https://bescienceswebsite.blob.core.windows.net/profiles" - - class InputHelper: def __init__(self, data_access): self.data_access = data_access @@ -37,40 +33,6 @@ def download_file(self, file_name, from_dir): self.data_access.copy_from(file_name, from_dir) -class ProfileHelper: - @staticmethod - def get_file_components(scenario_info, field_name): - ext = _file_extension[field_name] - version = scenario_info["base_" + field_name] - file_name = field_name + "_" + version + "." + ext - grid_model = scenario_info["grid_model"] - from_dir = f"raw/{grid_model}" - return file_name, from_dir - - @staticmethod - def download_file(file_name, from_dir): - print(f"--> Downloading {file_name} from blob storage.") - url = f"{BASE_URL}/{from_dir}/{file_name}" - dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name) - os.makedirs(os.path.dirname(dest), exist_ok=True) - resp = requests.get(url, stream=True) - content_length = int(resp.headers.get("content-length", 0)) - with open(dest, "wb") as f: - with tqdm( - unit="B", - unit_scale=True, - unit_divisor=1024, - miniters=1, - total=content_length, - ) as pbar: - for chunk in resp.iter_content(chunk_size=4096): - f.write(chunk) - pbar.update(len(chunk)) - - print("--> Done!") - return dest - - def _check_field(field_name): """Checks field name. @@ -134,24 +96,14 @@ def get_data(self, scenario_info, field_name): _cache.put(key, data) return data - @staticmethod - def get_profile_version(grid_model, kind): - """Returns available raw profile from blob storage + def get_profile_version(self, grid_model, kind): + """Returns available raw profile from blob storage or local disk :param str grid_model: grid model. :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. :return: (*list*) -- available profile version. - :raises ValueError: if kind not one of *'demand'*, *'hydro'*, *'solar'* or - *'wind'*. """ - if kind not in profile_kind: - raise ValueError("kind must be one of %s" % " | ".join(profile_kind)) - - resp = requests.get(f"{BASE_URL}/version.json") - version = resp.json() - if grid_model in version and kind in version[grid_model]: - return version[grid_model][kind] - print("No %s profiles available." % kind) + return self.data_access.get_profile_version(grid_model, kind) def _read_data(filepath): diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py index e8d5b99ed..6008aca6b 100644 --- a/powersimdata/scenario/create.py +++ b/powersimdata/scenario/create.py @@ -334,7 +334,7 @@ def get_base_profile(self, kind): :param str kind: one of *'demand'*, *'hydro'*, *'solar'*, *'wind'*. :return: (*list*) -- available version for selected profile kind. """ - return InputData.get_profile_version(self.grid_model, kind) + return InputData().get_profile_version(self.grid_model, kind) def set_base_profile(self, kind, version): """Sets demand profile. From d73908985237b267c8a8f85d264a3bb6d566113c Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Fri, 19 Mar 2021 17:34:53 -0700 Subject: [PATCH 08/12] chore: remove redundant validation --- powersimdata/data_access/profile_helper.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py index aa53fe302..d66191386 100644 --- a/powersimdata/data_access/profile_helper.py +++ b/powersimdata/data_access/profile_helper.py @@ -48,13 +48,7 @@ def parse_version(grid_model, kind, version): :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. :param dict version: json response :return: (*list*) -- available profile version. - :raises ValueError: if kind not one of *'demand'*, *'hydro'*, *'solar'* or - *'wind'*. """ - profile_kind = {"demand", "hydro", "solar", "wind"} - if kind not in profile_kind: - raise ValueError("kind must be one of %s" % " | ".join(profile_kind)) - if grid_model in version and kind in version[grid_model]: return version[grid_model][kind] print("No %s profiles available." % kind) From 505f53dd040a5743dd1cbcff307ce0cb766dac12 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Mon, 22 Mar 2021 12:17:33 -0700 Subject: [PATCH 09/12] test: add unit tests and move some logic around --- powersimdata/data_access/data_access.py | 16 +++---------- powersimdata/data_access/profile_helper.py | 20 +++++++++++++++- .../data_access/tests/test_profile_helper.py | 24 +++++++++++++++++++ powersimdata/input/tests/test_input_data.py | 20 ++++++++++++++++ 4 files changed, 66 insertions(+), 14 deletions(-) create mode 100644 powersimdata/data_access/tests/test_profile_helper.py create mode 100644 powersimdata/input/tests/test_input_data.py diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index 3fc82a4a5..9ff6bec80 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -1,4 +1,3 @@ -import json import operator import os import posixpath @@ -203,18 +202,9 @@ def get_profile_version(self, grid_model, kind): :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. :return: (*list*) -- available profile version. """ - blob_versions = super().get_profile_version(grid_model, kind) - version_file = os.path.join(server_setup.LOCAL_DIR, "version.json") - if not os.path.exists(version_file): - return blob_versions - with open(version_file) as f: - version = json.load(f) - return list( - set( - blob_versions - + ProfileHelper.parse_version(grid_model, kind, version) - ) - ) + blob_version = super().get_profile_version(grid_model, kind) + local_version = ProfileHelper.get_profile_version_local(grid_model, kind) + return list(set(blob_version + local_version)) class SSHDataAccess(DataAccess): diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py index d66191386..d3f689db0 100644 --- a/powersimdata/data_access/profile_helper.py +++ b/powersimdata/data_access/profile_helper.py @@ -1,3 +1,4 @@ +import json import os import requests @@ -46,12 +47,13 @@ def parse_version(grid_model, kind, version): :param str grid_model: grid model. :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. - :param dict version: json response + :param dict version: version information per grid model :return: (*list*) -- available profile version. """ if grid_model in version and kind in version[grid_model]: return version[grid_model][kind] print("No %s profiles available." % kind) + return [] @staticmethod def get_profile_version(grid_model, kind): @@ -64,3 +66,19 @@ def get_profile_version(grid_model, kind): resp = requests.get(f"{ProfileHelper.BASE_URL}/version.json") return ProfileHelper.parse_version(grid_model, kind, resp.json()) + + @staticmethod + def get_profile_version_local(grid_model, kind): + """Returns available raw profile from local file + + :param str grid_model: grid model. + :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. + :return: (*list*) -- available profile version. + """ + + version_file = os.path.join(server_setup.LOCAL_DIR, "version.json") + if not os.path.exists(version_file): + return [] + with open(version_file) as f: + version = json.load(f) + return ProfileHelper.parse_version(grid_model, kind, version) diff --git a/powersimdata/data_access/tests/test_profile_helper.py b/powersimdata/data_access/tests/test_profile_helper.py new file mode 100644 index 000000000..03423a525 --- /dev/null +++ b/powersimdata/data_access/tests/test_profile_helper.py @@ -0,0 +1,24 @@ +from powersimdata.data_access.profile_helper import ProfileHelper + + +def test_parse_version_default(): + assert [] == ProfileHelper.parse_version("usa_tamu", "solar", {}) + + +def test_parse_version_missing_key(): + version = {"solar": ["v123"]} + assert [] == ProfileHelper.parse_version("usa_tamu", "solar", version) + + +def test_parse_version(): + expected = ["v123", "v456"] + version = {"usa_tamu": {"solar": expected}} + assert expected == ProfileHelper.parse_version("usa_tamu", "solar", version) + assert [] == ProfileHelper.parse_version("usa_tamu", "hydro", version) + + +def test_get_file_components(): + s_info = {"base_wind": "v8", "grid_model": "europe"} + file_name, from_dir = ProfileHelper.get_file_components(s_info, "wind") + assert "wind_v8.csv" == file_name + assert "raw/europe" == from_dir diff --git a/powersimdata/input/tests/test_input_data.py b/powersimdata/input/tests/test_input_data.py new file mode 100644 index 000000000..0e37b3b20 --- /dev/null +++ b/powersimdata/input/tests/test_input_data.py @@ -0,0 +1,20 @@ +import pytest + +from powersimdata.input.input_data import InputHelper, _check_field + + +def test_get_file_components(): + s_info = {"id": "123"} + ct_file, _ = InputHelper.get_file_components(s_info, "ct") + grid_file, from_dir = InputHelper.get_file_components(s_info, "grid") + assert "123_ct.pkl" == ct_file + assert "123_grid.mat" == grid_file + assert "data/input" == from_dir + + +def test_check_field(): + _check_field("demand") + _check_field("hydro") + with pytest.raises(ValueError): + _check_field("foo") + _check_field("coal") From 27123e56023162c6acf589690c215f4004c19585 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Mon, 22 Mar 2021 14:28:01 -0700 Subject: [PATCH 10/12] chore: more specific method name --- powersimdata/data_access/data_access.py | 2 +- powersimdata/data_access/profile_helper.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index 9ff6bec80..4b5d8b15a 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -117,7 +117,7 @@ def push(self, file_name, checksum): raise NotImplementedError def get_profile_version(self, grid_model, kind): - return ProfileHelper.get_profile_version(grid_model, kind) + return ProfileHelper.get_profile_version_cloud(grid_model, kind) def close(self): """Perform any necessary cleanup for the object.""" diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py index d3f689db0..808fc96ca 100644 --- a/powersimdata/data_access/profile_helper.py +++ b/powersimdata/data_access/profile_helper.py @@ -56,7 +56,7 @@ def parse_version(grid_model, kind, version): return [] @staticmethod - def get_profile_version(grid_model, kind): + def get_profile_version_cloud(grid_model, kind): """Returns available raw profile from blob storage :param str grid_model: grid model. From 41cce2429ed21716ec8e7e0aa768a4c617a4ffa8 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Mon, 22 Mar 2021 15:53:02 -0700 Subject: [PATCH 11/12] docs: add missing docstrings --- powersimdata/data_access/data_access.py | 6 ++++++ powersimdata/data_access/profile_helper.py | 13 +++++++++++++ powersimdata/input/input_data.py | 11 +++++++++++ 3 files changed, 30 insertions(+) diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index 4b5d8b15a..879dc5af5 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -117,6 +117,12 @@ def push(self, file_name, checksum): raise NotImplementedError def get_profile_version(self, grid_model, kind): + """Returns available raw profile from blob storage + + :param str grid_model: grid model. + :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*. + :return: (*list*) -- available profile version. + """ return ProfileHelper.get_profile_version_cloud(grid_model, kind) def close(self): diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py index 808fc96ca..47ed30f08 100644 --- a/powersimdata/data_access/profile_helper.py +++ b/powersimdata/data_access/profile_helper.py @@ -12,6 +12,13 @@ class ProfileHelper: @staticmethod def get_file_components(scenario_info, field_name): + """Get the file name and relative path for the given profile and + scenario. + + :param dict scenario_info: a ScenarioInfo instance + :param str field_name: the kind of profile + :return: (*tuple*) -- file name and path + """ version = scenario_info["base_" + field_name] file_name = field_name + "_" + version + ".csv" grid_model = scenario_info["grid_model"] @@ -20,6 +27,12 @@ def get_file_components(scenario_info, field_name): @staticmethod def download_file(file_name, from_dir): + """Download the profile from blob storage at the given path + + :param str file_name: profile csv + :param str from_dir: the path relative to the blob container + :return: (*str*) -- path to downloaded file + """ print(f"--> Downloading {file_name} from blob storage.") url = f"{ProfileHelper.BASE_URL}/{from_dir}/{file_name}" dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name) diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py index c6a42aa06..c16d0876a 100644 --- a/powersimdata/input/input_data.py +++ b/powersimdata/input/input_data.py @@ -24,12 +24,23 @@ def __init__(self, data_access): @staticmethod def get_file_components(scenario_info, field_name): + """Get the file name and relative path for either ct or grid + + :param dict scenario_info: a ScenarioInfo instance + :param str field_name: the input file type + :return: (*tuple*) -- file name and path + """ ext = _file_extension[field_name] file_name = scenario_info["id"] + "_" + field_name + "." + ext from_dir = server_setup.INPUT_DIR return file_name, from_dir def download_file(self, file_name, from_dir): + """Download the file if using server, otherwise no-op + + :param str file_name: either grid or ct file name + :param str from_dir: the path relative to the root dir + """ self.data_access.copy_from(file_name, from_dir) From 4d5abcc80f9c4fa86e23ce0f6587b4dac9488c18 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Mon, 22 Mar 2021 17:47:37 -0700 Subject: [PATCH 12/12] fix: create local path correctly --- powersimdata/data_access/profile_helper.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py index 47ed30f08..6b6d14567 100644 --- a/powersimdata/data_access/profile_helper.py +++ b/powersimdata/data_access/profile_helper.py @@ -22,7 +22,7 @@ def get_file_components(scenario_info, field_name): version = scenario_info["base_" + field_name] file_name = field_name + "_" + version + ".csv" grid_model = scenario_info["grid_model"] - from_dir = f"raw/{grid_model}" + from_dir = os.path.join("raw", grid_model) return file_name, from_dir @staticmethod @@ -34,7 +34,8 @@ def download_file(file_name, from_dir): :return: (*str*) -- path to downloaded file """ print(f"--> Downloading {file_name} from blob storage.") - url = f"{ProfileHelper.BASE_URL}/{from_dir}/{file_name}" + url_path = "/".join(os.path.split(from_dir)) + url = f"{ProfileHelper.BASE_URL}/{url_path}/{file_name}" dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name) os.makedirs(os.path.dirname(dest), exist_ok=True) resp = requests.get(url, stream=True)