Skip to content

Commit

Permalink
Merge pull request #26 from NREL/multi_year
Browse files Browse the repository at this point in the history
Multi year subsetting
  • Loading branch information
MRossol committed Aug 17, 2020
2 parents 52344ff + eb47076 commit ca598da
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 37 deletions.
94 changes: 77 additions & 17 deletions rex/multi_year_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ class MultiYearH5:
Class to handle multiple years of h5 Resources
"""

def __init__(self, h5_dir, prefix='', suffix='.h5', res_cls=Resource,
hsds=False, **res_cls_kwargs):
def __init__(self, h5_dir, prefix='', suffix='.h5', years=None,
res_cls=Resource, hsds=False, **res_cls_kwargs):
"""
Parameters
----------
Expand All @@ -30,6 +30,8 @@ def __init__(self, h5_dir, prefix='', suffix='.h5', res_cls=Resource,
Prefix for resource .h5 files
suffix : str
Suffix for resource .h5 files
years : list, optional
List of years to access, by default None
res_cls : obj
Resource class to use to open and access resource data
hsds : bool
Expand All @@ -38,7 +40,8 @@ def __init__(self, h5_dir, prefix='', suffix='.h5', res_cls=Resource,
"""
self.h5_dir = h5_dir
self._year_map = self._map_file_years(h5_dir, prefix=prefix,
suffix=suffix, hsds=hsds)
suffix=suffix, hsds=hsds,
years=years)
res_cls_kwargs.update({'hsds': hsds})
self._h5_map = self._map_file_instances(set(self._year_map.values()),
res_cls=res_cls,
Expand Down Expand Up @@ -130,8 +133,7 @@ def h5(self):
Returns
-------
[type]
[description]
h5py.File
"""
return self._h5_map[self.h5_files[0]]

Expand Down Expand Up @@ -264,7 +266,43 @@ def _map_hsds_files(hsds_dir, prefix='', suffix='.h5'):
return year_map

@staticmethod
def _map_file_years(h5_dir, prefix='', suffix='.h5', hsds=False):
def _get_years(year_map, years):
"""
[summary]
Parameters
----------
year_map : dict
Dictionary mapping years to file paths
years : list
List of years of interest. Should be a subset of years in year_map
Returns
-------
new_map : dict
Dictionary mapping requested years to file paths
"""
new_map = {}
for year in years:
if not isinstance(year, int):
year = int(year)

if year in year_map:
new_map[year] = year_map[year]
else:
msg = ('A file for {} is unavailable!'.format(year))
warn(msg, ResourceWarning)

if not new_map:
msg = ('No files were found for the given years:\n{}'
.format(years))
raise RuntimeError(msg)

return new_map

@staticmethod
def _map_file_years(h5_dir, prefix='', suffix='.h5', hsds=False,
years=None):
"""
Map file paths to year for which it contains data
Expand All @@ -276,6 +314,11 @@ def _map_file_years(h5_dir, prefix='', suffix='.h5', hsds=False):
Prefix for resource .h5 files
suffix : str
Suffix for resource .h5 files
hsds : bool
Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS
behind HSDS
years : list, optional
List of years to access, by default None
Returns
-------
Expand All @@ -289,6 +332,9 @@ def _map_file_years(h5_dir, prefix='', suffix='.h5', hsds=False):
year_map = MultiYearH5._map_local_files(h5_dir, prefix=prefix,
suffix=suffix)

if years is not None:
year_map = MultiYearH5._get_years(year_map, years)

return year_map

@staticmethod
Expand Down Expand Up @@ -423,8 +469,8 @@ class MultiYearResource:
PREFIX = ''
SUFFIX = '.h5'

def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False,
res_cls=Resource):
def __init__(self, h5_path, years=None, unscale=True, str_decode=True,
hsds=False, res_cls=Resource):
"""
Parameters
----------
Expand All @@ -433,6 +479,8 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False,
Available formats:
/h5_dir/
/h5_dir/prefix*suffix
years : list, optional
List of years to access, by default None
unscale : bool
Boolean flag to automatically unscale variables on extraction
str_decode : bool
Expand All @@ -458,7 +506,7 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False,
cls_kwargs = {'unscale': unscale, 'str_decode': str_decode,
'hsds': hsds}
self._h5 = MultiYearH5(self.h5_dir, prefix=prefix, suffix=suffix,
res_cls=res_cls, **cls_kwargs)
years=years, res_cls=res_cls, **cls_kwargs)
self.h5_files = self._h5.h5_files
self.h5_file = self.h5_files[0]
self._i = 0
Expand Down Expand Up @@ -908,7 +956,8 @@ class MultiYearSolarResource:
Class to handle multiple years of solar resource data stored accross
multiple .h5 files
"""
def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False):
def __init__(self, h5_path, years=None, unscale=True, str_decode=True,
hsds=False):
"""
Parameters
----------
Expand All @@ -917,6 +966,8 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False):
Available formats:
/h5_dir/
/h5_dir/prefix*suffix
years : list, optional
List of years to access, by default None
unscale : bool
Boolean flag to automatically unscale variables on extraction
str_decode : bool
Expand All @@ -926,7 +977,7 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False):
Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS
behind HSDS
"""
super().__init__(h5_path, unscale=unscale, hsds=hsds,
super().__init__(h5_path, years=years, unscale=unscale, hsds=hsds,
str_decode=str_decode, res_cls=SolarResource)


Expand All @@ -937,7 +988,8 @@ class MultiYearNSRDB(MultiYearResource):
"""
PREFIX = 'nsrdb'

def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False):
def __init__(self, h5_path, years=None, unscale=True, str_decode=True,
hsds=False):
"""
Parameters
----------
Expand All @@ -946,6 +998,8 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False):
Available formats:
/h5_dir/
/h5_dir/prefix*suffix
years : list, optional
List of years to access, by default None
unscale : bool
Boolean flag to automatically unscale variables on extraction
str_decode : bool
Expand All @@ -955,7 +1009,7 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False):
Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS
behind HSDS
"""
super().__init__(h5_path, unscale=unscale, hsds=hsds,
super().__init__(h5_path, years=years, unscale=unscale, hsds=hsds,
str_decode=str_decode, res_cls=NSRDB)


Expand All @@ -966,7 +1020,8 @@ class MultiYearWindResource(MultiYearResource):
"""
PREFIX = 'wtk'

def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False):
def __init__(self, h5_path, years=None, unscale=True, str_decode=True,
hsds=False):
"""
Parameters
----------
Expand All @@ -975,6 +1030,8 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False):
Available formats:
/h5_dir/
/h5_dir/prefix*suffix
years : list, optional
List of years to access, by default None
unscale : bool
Boolean flag to automatically unscale variables on extraction
str_decode : bool
Expand All @@ -984,7 +1041,7 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False):
Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS
behind HSDS
"""
super().__init__(h5_path, unscale=unscale, hsds=hsds,
super().__init__(h5_path, years=years, unscale=unscale, hsds=hsds,
str_decode=str_decode, res_cls=WindResource)


Expand All @@ -994,7 +1051,8 @@ class MultiYearWaveResource:
multiple .h5 files
"""

def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False):
def __init__(self, h5_path, years=None, unscale=True, str_decode=True,
hsds=False):
"""
Parameters
----------
Expand All @@ -1003,6 +1061,8 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False):
Available formats:
/h5_dir/
/h5_dir/prefix*suffix
years : list, optional
List of years to access, by default None
unscale : bool
Boolean flag to automatically unscale variables on extraction
str_decode : bool
Expand All @@ -1012,5 +1072,5 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False):
Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS
behind HSDS
"""
super().__init__(h5_path, unscale=unscale, hsds=hsds,
super().__init__(h5_path, years=years, unscale=unscale, hsds=hsds,
str_decode=str_decode, res_cls=WaveResource)
11 changes: 8 additions & 3 deletions rex/resource_extraction/multi_year_resource_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
MultiYearNSRDBX,
MultiYearWindX,
MultiYearWaveX)
from rex.utilities.cli_dtypes import STRLIST
from rex.utilities.cli_dtypes import STRLIST, INTLIST
from rex.utilities.loggers import init_mult

logger = logging.getLogger(__name__)
Expand All @@ -26,6 +26,11 @@
help=('Path to Resource .h5 files'))
@click.option('--out_dir', '-o', required=True, type=click.Path(),
help='Directory to dump output files')
@click.option('--years', '-yrs', type=INTLIST, default=None,
help='List of years to access, by default None')
@click.option('--hsds', '-hsds', is_flag=True,
help=("Boolean flag to use h5pyd to handle .h5 'files' hosted "
"on AWS behind HSDS"))
@click.option('--res_cls', '-res',
type=click.Choice(['Resource', 'NSRDB', 'Wind', 'Wave'],
case_sensitive=False),
Expand All @@ -34,14 +39,14 @@
@click.option('-v', '--verbose', is_flag=True,
help='Flag to turn on debug logging. Default is not verbose.')
@click.pass_context
def main(ctx, resource_path, res_cls, out_dir, verbose):
def main(ctx, resource_path, out_dir, years, hsds, res_cls, verbose):
"""
ResourceX Command Line Interface
"""
ctx.ensure_object(dict)
ctx.obj['H5'] = resource_path
ctx.obj['OUT_DIR'] = out_dir
ctx.obj['CLS_KWARGS'] = {}
ctx.obj['CLS_KWARGS'] = {'years': years, 'hsds': hsds}

if res_cls == 'Resource':
ctx.obj['CLS'] = MultiYearResourceX
Expand Down
Loading

0 comments on commit ca598da

Please sign in to comment.