Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make_SAM_files and SolarResource.get_SAM_df updates #124

Merged
merged 6 commits into from
Mar 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions rex/renewable_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,21 @@ class SolarResource(BaseResource):
--------
resource.BaseResource : Parent class
"""
def get_SAM_df(self, site):
def get_SAM_df(self, site, extra_cols=None):
"""
Get SAM solar resource DataFrame for given site

Parameters
----------
site : int
Site to extract SAM DataFrame for
Site to extract SAM DataFrame for.
extra_cols : dict, optional
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, i like this. Always a good idea to add kwargs that can be endlessly expanded (instead of something like sufrace_albedo=True just to add the single new col).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Totally agreed!

A dictionary where they keys are extra columns
to extract from the SAM solar resource DataFrame
and the values are the names the new columns should
have (e.g. extra_cols={'surface_albedo': 'Surface
Albedo'} will extract the 'surface_albedo' from the
resource file and call it 'Surface Albedo' in the output).

Returns
-------
Expand All @@ -44,13 +51,16 @@ def get_SAM_df(self, site):
'Month': self.time_index.month,
'Day': self.time_index.day,
'Hour': self.time_index.hour})
if len(self) > 8784:

if len(self) > 8784 or (self.time_index.minute != 0).any():
res_df['Minute'] = self.time_index.minute

time_zone = self.meta.loc[site, 'timezone']
time_interval = len(self.time_index) // 8760

for var in ['dni', 'dhi', 'wind_speed', 'air_temperature']:
main_cols = ['dni', 'dhi', 'wind_speed', 'air_temperature']
extra_cols = extra_cols or {}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting! I've never seen this. So it will go to the {} if extra_cols is bool(False)?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup! I love using this pattern when I use an optional input with a default value of None in the function declaration. I feel it is an elegant way to get around the fact that you should never use mutables as default arguments.

for var in main_cols + list(extra_cols):
ds_slice = (slice(None), site)
var_array = self._get_ds(var, ds_slice)
var_array = SAMResource.roll_timeseries(var_array, time_zone,
Expand All @@ -60,6 +70,7 @@ def get_SAM_df(self, site):

col_map = {'dni': 'DNI', 'dhi': 'DHI', 'wind_speed': 'Wind Speed',
'air_temperature': 'Temperature'}
col_map.update(extra_cols)
res_df = res_df.rename(columns=col_map)
res_df.name = "SAM_-{}".format(site)

Expand Down
51 changes: 43 additions & 8 deletions rex/resource_extraction/resource_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@ def _to_SAM_csv(sam_df, site_meta, out_path, write_time=True):
col_map[c] = 'Time Zone'
elif c.lower() == 'gid':
col_map[c] = 'Location ID'
else:
elif c.islower():
col_map[c] = c.capitalize()

site_meta = site_meta.rename(columns=col_map)
Expand Down Expand Up @@ -886,7 +886,8 @@ def get_box_df(self, ds_name, lat_lon_1, lat_lon_2):

return box_df

def get_SAM_gid(self, gid, out_path=None, write_time=True, **kwargs):
def get_SAM_gid(self, gid, out_path=None, write_time=True,
extra_meta_data=None, **kwargs):
"""
Extract time-series of all variables needed to run SAM for nearest
site to given resource gid
Expand All @@ -899,6 +900,11 @@ def get_SAM_gid(self, gid, out_path=None, write_time=True, **kwargs):
Path to save SAM data to in SAM .csv format, by default None
write_time : bool
Flag to write the time columns (Year, Month, Day, Hour, Minute)
extra_meta_data : dict, optional
Dictionary that maps the names and values of extra meta
info. For example, extra_meta_data={'TMY Year': '2020'}
will add a column 'TMY Year' to the meta data with
a value of '2020'.
kwargs : dict
Internal kwargs for get_SAM_df

Expand Down Expand Up @@ -926,6 +932,11 @@ def get_SAM_gid(self, gid, out_path=None, write_time=True, **kwargs):
i_out_path = i_out_path.replace('.csv', tag)

site_meta = self['meta', res_id]

extra_meta_data = extra_meta_data or {}
for col_name, val in extra_meta_data.items():
site_meta[col_name] = val

if self.data_version is not None:
# pylint: disable=unsupported-assignment-operation
site_meta['Version'] = self.data_version
Expand Down Expand Up @@ -1332,7 +1343,8 @@ def get_raster_index(self, target, shape, meta=None, max_delta=50):

@classmethod
def make_SAM_files(cls, res_h5, gids, out_path, write_time=True,
max_workers=1, n_chunks=36, **kwargs):
extra_meta_data=None, max_workers=1, n_chunks=36,
**kwargs):
"""A performant parallel entry point for making many SAM csv
files for many gids

Expand All @@ -1347,6 +1359,11 @@ def make_SAM_files(cls, res_h5, gids, out_path, write_time=True,
"*_{gid}.csv" will be appended to the file path
write_time : bool
Flag to write the time columns (Year, Month, Day, Hour, Minute)
extra_meta_data : dict, optional
Dictionary that maps the names and values of extra meta
info. For example, extra_meta_data={'TMY Year': '2020'}
will add a column 'TMY Year' to the meta data with
a value of '2020'.
max_workers : int | None
Number of parallel workers. None for all workers.
n_chunks : int
Expand All @@ -1358,15 +1375,19 @@ def make_SAM_files(cls, res_h5, gids, out_path, write_time=True,
if max_workers == 1:
with cls(res_h5) as res:
res.get_SAM_gid(gids, out_path=out_path,
write_time=write_time, **kwargs)
write_time=write_time,
extra_meta_data=extra_meta_data,
**kwargs)
else:
msg = 'Bad gids dtype: {}'.format(type(gids))
assert isinstance(gids, (list, tuple, np.ndarray)), msg
gid_chunks = np.array_split(np.array(gids), n_chunks)
with SpawnProcessPool(max_workers=max_workers) as spp:
for chunk in gid_chunks:
spp.submit(cls.make_SAM_files, res_h5, chunk, out_path,
write_time=write_time, max_workers=1, **kwargs)
write_time=write_time,
extra_meta_data=extra_meta_data,
max_workers=1, **kwargs)

def close(self):
"""
Expand Down Expand Up @@ -1690,7 +1711,7 @@ class WindX(ResourceX):
DEFAULT_RES_CLS = WindResource

def get_SAM_gid(self, hub_height, gid, out_path=None, write_time=True,
**kwargs):
extra_meta_data=None, **kwargs):
"""
Extract time-series of all variables needed to run SAM for nearest
site to given resource gid and hub height
Expand All @@ -1705,6 +1726,11 @@ def get_SAM_gid(self, hub_height, gid, out_path=None, write_time=True,
Path to save SAM data to in SAM .csv format, by default None
write_time : bool
Flag to write the time columns (Year, Month, Day, Hour, Minute)
extra_meta_data : dict, optional
Dictionary that maps the names and values of extra meta
info. For example, extra_meta_data={'TMY Year': '2020'}
will add a column 'TMY Year' to the meta data with
a value of '2020'.
kwargs : dict
Internal kwargs for get_SAM_df:
- require_wind_dir
Expand All @@ -1723,7 +1749,9 @@ def get_SAM_gid(self, hub_height, gid, out_path=None, write_time=True,
kwargs.update({'add_header': True})

SAM_df = super().get_SAM_gid(gid, out_path=out_path,
write_time=write_time, **kwargs)
write_time=write_time,
extra_meta_data=extra_meta_data,
**kwargs)

return SAM_df

Expand Down Expand Up @@ -1766,7 +1794,8 @@ def get_SAM_lat_lon(self, hub_height, lat_lon, check_lat_lon=True,

@classmethod
def make_SAM_files(cls, hub_height, res_h5, gids, out_path,
write_time=True, max_workers=1, n_chunks=36, **kwargs):
write_time=True, extra_meta_data=None, max_workers=1,
n_chunks=36, **kwargs):
"""A performant parallel entry point for making many SAM csv
files for many gids

Expand All @@ -1783,6 +1812,11 @@ def make_SAM_files(cls, hub_height, res_h5, gids, out_path,
"*_{gid}.csv" will be appended to the file path
write_time : bool
Flag to write the time columns (Year, Month, Day, Hour, Minute)
extra_meta_data : dict, optional
Dictionary that maps the names and values of extra meta
info. For example, extra_meta_data={'TMY Year': '2020'}
will add a column 'TMY Year' to the meta data with
a value of '2020'.
max_workers : int | None
Number of parallel workers. None for all workers.
n_chunks : int
Expand All @@ -1792,6 +1826,7 @@ def make_SAM_files(cls, hub_height, res_h5, gids, out_path,
"""
kwargs['height'] = hub_height
super().get_SAM_gid(res_h5, gids, out_path, write_time=write_time,
extra_meta_data=extra_meta_data,
max_workers=max_workers, n_chunks=n_chunks,
**kwargs)

Expand Down