# NREL - NSRDB Multi-year Means HSDS Example

This notebook demonstrates data export from the National Renewable Energy Laboratory (NREL) Wind Integration National Dataset (WIND) Toolkit and National Solar Radiation Database (NSRDB) data. The data is provided from Amazon Web Services using the HDF Group's Highly Scalable Data Service (HSDS).

Please consult the README file for setup instructions prior to running this notebook.


In [1]:
%matplotlib inline
import h5pyd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial import cKDTree
import time

# Plotting settings
mpl.rcParams['font.sans-serif'] = 'DejaVu Sans'
mpl.rcParams['pdf.fonttype'] = 42
mpl.rc('xtick', labelsize=16) 
mpl.rc('ytick', labelsize=16)
mpl.rc('font', size=16)

## NSRDB on AWS (via HSDS)

In [2]:
file_path = '/nrel/nsrdb/v3/nsrdb_2017.h5'
with h5pyd.File(file_path, mode='r') as f:
    meta = pd.DataFrame(f['meta'][...])
    
meta.head()

Unnamed: 0,latitude,longitude,elevation,timezone,country,state,county,urban,population,landcover
0,-19.99,-175.259995,0.0,13,b'None',b'None',b'None',b'None',-9999,210
1,-19.99,-175.220001,0.0,13,b'None',b'None',b'None',b'None',-9999,210
2,-19.99,-175.179993,0.0,13,b'None',b'None',b'None',b'None',-9999,210
3,-19.99,-175.139999,0.0,13,b'None',b'None',b'None',b'None',-9999,210
4,-19.99,-175.100006,0.0,13,b'None',b'None',b'None',b'None',-9999,210


## Compute Multi-year GHI means for California

In [3]:
ca_meta = meta.loc[meta['state'] == b'California']
ca_pos = ca_meta.index.values.copy()
ca_slice = slice(ca_pos[0], ca_pos[-1] + 1)
ca_pos -= ca_pos[0]
down_size = 17520 * len(ca_pos) * 2 * 10**-6
ca_meta.head()
print('Number of NSRDB pixels in CA = {}'.format(len(ca_meta)))
print('Download size per year = {:.4f} MB'.format(down_size))

Number of NSRDB pixels in CA = 26010
Download size per year = 911.3904 MB


In [6]:
ca_df = ca_meta[['latitude', 'longitude']].copy()
ext_time = {}
ts = time.time()
for year in range(1998, 2018):
    col = '{}_means'.format(year)
    ty = time.time()
    file_path = '/nrel/nsrdb/v3/nsrdb_{}.h5'.format(year)
    with h5pyd.File(file_path, mode='r') as f:
        ghi = f['ghi'][:, ca_slice]
    
    ca_df[col] = np.mean(ghi[:, ca_pos], axis=0)
    tt = time.time() - ty
    ext_time[str(year)] = tt
    print('Means for {} computed in {:.4f} minutes'.format(year, tt / 60))
    
tt = time.time() - ts
ext_time['total'] = tt
print('Time to extract all means =  {:.4f} hours'.format(tt / 3600))



KeyboardInterrupt: 

In [None]:
ext_df = pd.DataFrame(ext_time, index=['seconds']).T
ext_df['minutes'] = ext_df['seconds'] / 60
ext_df['hours'] = ext_df['seconds'] / 3600
ax = ext_df.iloc[:-1]['minutes'].plot.bar()
ax.set_xlabel('Year')
ax.set_ylabel('Compute Time (min)')
plt.show()

In [None]:
means_cols = [col for col in ca_df.columns
              if 'means' in col]
ca_df['MY means'] = ca_df[means_cols].mean(axis=1)
ca_df['CV'] = ca_df[means_cols].std(axis=1) / ca_df['MY means']
ca_df.head()

In [None]:
ax = ca_df.plot.scatter(x='longitude', y='latitude', c='MY means',
                colormap='YlOrRd',
                title='Multi-year GHI Means')
plt.show()

In [None]:
ca_df.plot.scatter(x='longitude', y='latitude', c='CV',
                colormap='BuPu',
                title='CV of annual GHI means')
plt.show()