In [5]:
import h5pyd
#import json

class InvalidUsage(Exception):
    status_code = 400

    def __init__(self, message, status_code=None, payload=None):
        Exception.__init__(self)
        self.message = message
        if status_code is not None:
            self.status_code = status_code
        self.payload = payload

    def to_dict(self):
        rv = dict(self.payload or ())
        rv['message'] = self.message
        return rv

def available_datasets(f):
    """ Return list of all datasets available in resource f.
    """
    try:
        datasets = sorted(list(f))
    except ValueError:
        raise InvalidUsage("Problem with processing WTK datasets.")
    return datasets

# with open('config.json', 'r') as f:
#     config = json.load(f)

endpoint = "https://tap-hsds.ace.nrel.gov"
#endpoint = "https://developer.nrel.gov/api/hsds"

domain = "/nrel/wtk-us.h5"
username = None
password = None
api_key = None

try:
    # hsds_f = h5pyd.File(domain=domain,
    #                endpoint=endpoint,
    #                username=username,
    #                password=password,
    #                api_key=api_key,
    #                mode='r')

    # Debugging showed that new HSDS instance doesn't need user & pass & key
    # (in fact, they lead to errors if provided)
    hsds_f = h5pyd.File(domain=domain,
                   endpoint=endpoint,
                   mode='r')
except OSError:
    raise InvalidUsage("Failed to access HSDS resource", status_code=403)

print("\nPrinting list of availble datasets:")
print(available_datasets(hsds_f))

print("\nPrinting small sample of windspeed data (3 values, first one should be: 10.68148):")
dset = hsds_f["windspeed_60m"]
print(dset[420:420+3, 42, 42])


Printing list of availble datasets:
['DIF', 'DNI', 'GHI', 'coordinates', 'datetime', 'inversemoninobukhovlength_2m', 'precipitationrate_0m', 'pressure_0m', 'pressure_100m', 'pressure_200m', 'relativehumidity_2m', 'status', 'temperature_100m', 'temperature_10m', 'temperature_120m', 'temperature_140m', 'temperature_160m', 'temperature_200m', 'temperature_2m', 'temperature_40m', 'temperature_60m', 'temperature_80m', 'winddirection_100m', 'winddirection_10m', 'winddirection_120m', 'winddirection_140m', 'winddirection_160m', 'winddirection_200m', 'winddirection_40m', 'winddirection_60m', 'winddirection_80m', 'windspeed_100m', 'windspeed_10m', 'windspeed_120m', 'windspeed_140m', 'windspeed_160m', 'windspeed_200m', 'windspeed_40m', 'windspeed_60m', 'windspeed_80m']

Printing small sample of windspeed data (3 values, first one should be: 10.68148):
[10.68148  10.650963 10.821861]


In [6]:
import pandas as pd
import dateutil

def time_indices_all(f):
    """ Returns a dataframe with all processed timestamps from f
    (meant to run once, when the app is first loaded, and reduce overhead)
    """
    dt = f["datetime"]
    dt = pd.DataFrame({"datetime": dt[:]}, index=range(0, dt.shape[0]))
    dt['datetime'] = dt['datetime'].apply(dateutil.parser.parse)
    return dt

time_df = time_indices_all(hsds_f)

print(time_df)

                 datetime
0     2007-01-01 00:00:00
1     2007-01-01 01:00:00
2     2007-01-01 02:00:00
3     2007-01-01 03:00:00
4     2007-01-01 04:00:00
...                   ...
61363 2013-12-31 19:00:00
61364 2013-12-31 20:00:00
61365 2013-12-31 21:00:00
61366 2013-12-31 22:00:00
61367 2013-12-31 23:00:00

[61368 rows x 1 columns]


In [8]:
# 4-year test

start_t = pd.to_datetime('20100101', format='%Y%m%d')
end_t = pd.to_datetime('20140101', format='%Y%m%d')
index_list = time_df[(time_df.datetime >= start_t) & (time_df.datetime <= end_t)].index
print(index_list.min(), index_list.max())

26304 61367


In [9]:
%%timeit

for i in range(2):
    print(dset[index_list.min():index_list.max(), 42+i:42+i+1, 42+i:42+i+1])

[10.09552  12.027344 11.42308  ...  8.435318  8.856468  8.407852]
[10.596024 10.788292 10.827965 ...  8.667259  8.731346  8.203377]
[10.09552  12.027344 11.42308  ...  8.435318  8.856468  8.407852]
[10.596024 10.788292 10.827965 ...  8.667259  8.731346  8.203377]
[10.09552  12.027344 11.42308  ...  8.435318  8.856468  8.407852]
[10.596024 10.788292 10.827965 ...  8.667259  8.731346  8.203377]
[10.09552  12.027344 11.42308  ...  8.435318  8.856468  8.407852]
[10.596024 10.788292 10.827965 ...  8.667259  8.731346  8.203377]
[10.09552  12.027344 11.42308  ...  8.435318  8.856468  8.407852]
[10.596024 10.788292 10.827965 ...  8.667259  8.731346  8.203377]
[10.09552  12.027344 11.42308  ...  8.435318  8.856468  8.407852]
[10.596024 10.788292 10.827965 ...  8.667259  8.731346  8.203377]
[10.09552  12.027344 11.42308  ...  8.435318  8.856468  8.407852]
[10.596024 10.788292 10.827965 ...  8.667259  8.731346  8.203377]
[10.09552  12.027344 11.42308  ...  8.435318  8.856468  8.407852]
[10.596024

In [11]:
import rex

In [None]:
# Source: https://nrel.github.io/rex/rex/rex.multi_year_resource.html#rex.multi_year_resource.MultiYearResource

In [18]:
from rex.multi_year_resource import * 

In [37]:
with MultiYearWindResource('/nrel/wtk/conus/wtk_conus_*.h5', hsds=True) as res:
    ti = res.time_index
    print(ti)

DatetimeIndex(['2007-01-01 00:00:00+00:00', '2007-01-01 01:00:00+00:00',
               '2007-01-01 02:00:00+00:00', '2007-01-01 03:00:00+00:00',
               '2007-01-01 04:00:00+00:00', '2007-01-01 05:00:00+00:00',
               '2007-01-01 06:00:00+00:00', '2007-01-01 07:00:00+00:00',
               '2007-01-01 08:00:00+00:00', '2007-01-01 09:00:00+00:00',
               ...
               '2014-12-31 14:00:00+00:00', '2014-12-31 15:00:00+00:00',
               '2014-12-31 16:00:00+00:00', '2014-12-31 17:00:00+00:00',
               '2014-12-31 18:00:00+00:00', '2014-12-31 19:00:00+00:00',
               '2014-12-31 20:00:00+00:00', '2014-12-31 21:00:00+00:00',
               '2014-12-31 22:00:00+00:00', '2014-12-31 23:00:00+00:00'],
              dtype='datetime64[ns, UTC]', length=70128, freq=None)


In [102]:
myr = MultiYearWindResource('/nrel/wtk/conus/wtk_conus_*.h5', hsds=True)
#myr = MultiYearWindResource('/nrel/wtk/conus/wtk_conus_*.h5', years=[2010,2011], hsds=True)

In [103]:
dir(myr)

['PREFIX',
 'SUFFIX',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__next__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_check_for_years',
 '_check_year_slice',
 '_get_coords',
 '_get_ds',
 '_get_meta',
 '_get_time_index',
 '_get_year_ds',
 '_h5',
 '_i',
 '_time_index',
 'close',
 'coordinates',
 'datasets',
 'dsets',
 'get_attrs',
 'get_dset_properties',
 'get_meta_arr',
 'get_scale',
 'get_units',
 'global_attrs',
 'h5',
 'h5_dir',
 'h5_file',
 'h5_files',
 'lat_lon',
 'meta',
 'shape',
 'time_index']

In [104]:
help(myr)

Help on MultiYearWindResource in module rex.multi_year_resource object:

class MultiYearWindResource(MultiYearResource)
 |  MultiYearWindResource(h5_path, years=None, unscale=True, str_decode=True, hsds=False)
 |  
 |  Class to handle multiple years of wind resource data stored accross
 |  multiple .h5 files
 |  
 |  Method resolution order:
 |      MultiYearWindResource
 |      MultiYearResource
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, h5_path, years=None, unscale=True, str_decode=True, hsds=False)
 |      Parameters
 |      ----------
 |      h5_path : str
 |          Path to directory containing multi-file resource file sets.
 |          Available formats:
 |              /h5_dir/
 |              /h5_dir/prefix*suffix
 |      years : list, optional
 |          List of years to access, by default None
 |      unscale : bool
 |          Boolean flag to automatically unscale variables on extraction
 |      str_decode : bool
 |          Boolean flag

In [105]:
myr.coordinates[0:5]

array([[  37.603382, -127.61705 ],
       [  37.62042 , -127.62601 ],
       [  37.63745 , -127.63498 ],
       [  37.654484, -127.64395 ],
       [  37.67151 , -127.65292 ]], dtype=float32)

In [107]:
dt = myr.time_index
dt = pd.DataFrame({"datetime": dt[:]}, index=range(0, dt.shape[0]))
#dt["datetime"][0].tz

start_t = pd.to_datetime('20100101', format='%Y%m%d').tz_localize('UTC') 
end_t = pd.to_datetime('20130101', format='%Y%m%d').tz_localize('UTC') 
display(dt[(dt.datetime >= start_t) & (dt.datetime <= end_t)])
index_list = dt[(dt.datetime >= start_t) & (dt.datetime <= end_t)].index
print(index_list.min(), index_list.max())

Unnamed: 0,datetime
26304,2010-01-01 00:00:00+00:00
26305,2010-01-01 01:00:00+00:00
26306,2010-01-01 02:00:00+00:00
26307,2010-01-01 03:00:00+00:00
26308,2010-01-01 04:00:00+00:00
...,...
52604,2012-12-31 20:00:00+00:00
52605,2012-12-31 21:00:00+00:00
52606,2012-12-31 22:00:00+00:00
52607,2012-12-31 23:00:00+00:00


26304 52608


In [118]:
%%timeit
print(myr['windspeed_60m', index_list.min():index_list.max()+1, 3])

[14.16 14.14 13.58 ...  2.78  4.19  6.02]
[14.16 14.14 13.58 ...  2.78  4.19  6.02]
[14.16 14.14 13.58 ...  2.78  4.19  6.02]
[14.16 14.14 13.58 ...  2.78  4.19  6.02]
[14.16 14.14 13.58 ...  2.78  4.19  6.02]
[14.16 14.14 13.58 ...  2.78  4.19  6.02]
[14.16 14.14 13.58 ...  2.78  4.19  6.02]
[14.16 14.14 13.58 ...  2.78  4.19  6.02]
1.32 s ± 114 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
