### Notebook with examples of HSDS queries

In [1]:
# Code below uses a class from the rex package:
# https://nrel.github.io/rex/rex/rex.multi_year_resource.html#rex.multi_year_resource.MultiYearResource

from rex.resource_extraction import MultiYearWindX
import pandas as pd

# This is a bit different than the code above; MultiYearWindX is used instead of 
# MultiYearWindResource to access cKDTree object and 
# find grid points that are the closest to the location of interest

myr = MultiYearWindX('/nrel/wtk/conus/wtk_conus_*.h5', hsds=True)

In [2]:
dt = myr.time_index

In [3]:
# Subsetting based on the time interval of interest

dt = pd.DataFrame({"datetime": dt[:]}, index=range(0, dt.shape[0]))
start_t = pd.to_datetime('20130101', format='%Y%m%d').tz_localize('UTC') 
end_t = pd.to_datetime('20140101', format='%Y%m%d').tz_localize('UTC') 
index_list = dt[(dt.datetime >= start_t) & (dt.datetime <= end_t)].index
start_idx = index_list.min()
end_idx = index_list.max()

print("Selected time interval corresponds to time indicies in the range between:")
print(start_idx, end_idx)

# This is how time index can be accessed
myr.time_index[0:5]

Selected time interval corresponds to time indicies in the range between:
52608 61368


DatetimeIndex(['2007-01-01 00:00:00+00:00', '2007-01-01 01:00:00+00:00',
               '2007-01-01 02:00:00+00:00', '2007-01-01 03:00:00+00:00',
               '2007-01-01 04:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

In [4]:
# Find indices of N=4 grid points that are the closest to the site (dd will be distances from the site)
latlon = (40.7484, -73.9857)

dd,ii = myr.tree.query(latlon, 4)
print("Distances and indices:\n", dd,ii)

print("Lat/lon pairs for identified closest grid points:\n", myr.coordinates[ii])

# This is essentially Nearest-Neighbor interpolaton -- extracting wind speed values for the grid point 
# that is the closest to the specified location
values = myr['windspeed_60m', start_idx:end_idx, ii[0]]

print(values)
print("Stats for extracted values:")
print(pd.Series(values).describe())

# Performance here should be as good as with MultiYearWindResource (e.g., ~2s for 7 years)

Distances and indices:
 [0.00688171 0.0175041  0.0196696  0.0208428 ] [2301508 2300566 2301509 2301507]
Lat/lon pairs for identified closest grid points:
 [[ 40.74543  -73.97949 ]
 [ 40.75046  -74.00308 ]
 [ 40.763283 -73.97284 ]
 [ 40.727562 -73.986145]]
[8.1  8.77 9.18 ... 6.63 7.58 6.85]
Stats for extracted values:
count    8760.000000
mean        5.208496
std         2.570455
min         0.060000
25%         3.290000
50%         4.950000
75%         6.840000
max        15.720000
dtype: float64


In [5]:
latlon = (40.7484, -73.9857)

dd,ii = myr.tree.query(latlon, 4)
print("Distances and indices:\n", dd,ii)

print("Lat/lon pairs for identified closest grid points:\n", myr.coordinates[ii])

Distances and indices:
 [0.00688171 0.0175041  0.0196696  0.0208428 ] [2301508 2300566 2301509 2301507]
Lat/lon pairs for identified closest grid points:
 [[ 40.74543  -73.97949 ]
 [ 40.75046  -74.00308 ]
 [ 40.763283 -73.97284 ]
 [ 40.727562 -73.986145]]


In [6]:
def find_tile_optimized(myr, lat, lon, gridpoint_count=1):
    """ Return dataframe with information about gridpoints in resource f that
    are neighboring (lat, lon).
    
    This is the NEW version; old one below should be eventually DEPRECATED!
    """
    dd,ii = myr.tree.query((lat,lon), gridpoint_count)
    if gridpoint_count == 1:
        dd = [dd]
        ii = [ii]
        
    gridpoints_latlon = myr.coordinates[ii]
    
    res = pd.DataFrame(columns=["lat", "lon", "index", "d"])

    for i in range(len(ii)):
        res.loc[len(res)] = [gridpoints_latlon[i][0], gridpoints_latlon[i][1], ii[i], dd[i]]
    
    res["index"] = pd.to_numeric(res["index"], downcast='integer')
    
    # The returned dataframe will be sorted by d because tree.query() returns points sorted by distance
    return res

display(find_tile_optimized(myr, 40.7484, -73.9857, 1))

Unnamed: 0,lat,lon,index,d
0,40.74543,-73.979492,2301508,0.006882


In [7]:
sorted(list(myr))

['coordinates',
 'inversemoninobukhovlength_2m',
 'meta',
 'precipitationrate_0m',
 'pressure_0m',
 'pressure_100m',
 'pressure_200m',
 'relativehumidity_2m',
 'temperature_100m',
 'temperature_10m',
 'temperature_120m',
 'temperature_140m',
 'temperature_160m',
 'temperature_200m',
 'temperature_2m',
 'temperature_40m',
 'temperature_60m',
 'temperature_80m',
 'time_index',
 'winddirection_100m',
 'winddirection_10m',
 'winddirection_120m',
 'winddirection_140m',
 'winddirection_160m',
 'winddirection_200m',
 'winddirection_40m',
 'winddirection_60m',
 'winddirection_80m',
 'windspeed_100m',
 'windspeed_10m',
 'windspeed_120m',
 'windspeed_140m',
 'windspeed_160m',
 'windspeed_200m',
 'windspeed_40m',
 'windspeed_60m',
 'windspeed_80m']

In [8]:
myr['inversemoninobukhovlength_2m', start_idx:end_idx, ii[0]]

array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)

In [11]:
pd.Series(myr['inversemoninobukhovlength_2m', start_idx:end_idx, 2300564]).describe()

OSError: [Errno 429] Too Many Requests