# Predicting Power Using the NSRDB<a id='Predicting_Power_Using_the_NSRDB'></a>

In [1]:
import pandas as pd
import numpy as np
import os
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import __version__ as sklearn_version
from scipy.spatial import cKDTree
import h5pyd
import requests
import json
from requests.structures import CaseInsensitiveDict



In [2]:
nsrdb = h5pyd.File("/nrel/nsrdb/v3/nsrdb_2020.h5", 'r')
x_train = pd.read_csv('../data/X_train2.csv')
y_train = pd.read_csv('../data/Y_train2.csv')
Electricity_cost = pd.read_csv('../data/StatesElectricity.csv')
Attributes = pd.read_csv('../data/Attributes.csv')
url1 = "https://api.geoapify.com/v1/geocode/search?text=1600%20Pennsylvania%20Avenue%20NW%2C%20Washington%2C%20DC%2020500%20United%20States&apiKey=57e88c5179064b9db5ccd0973355973a"
url2 = "https://api.geoapify.com/v1/geocode/search?text=400%20Broad%20Street%2C%20Seattle&2C%20Washington%2098109%20United%20States&apiKey=57e88c5179064b9db5ccd0973355973a"
url3 = "https://api.geoapify.com/v1/geocode/search?text=4803%20Rio%20Grand%20Boulevard%20NW%2C%20Los%20Ranchos%20De%20Albuquerque%2C%20New%20Mexico%2087107%20United%20States&apiKey=57e88c5179064b9db5ccd0973355973a"
url4 = "https://api.geoapify.com/v1/geocode/search?text=350%20First%20Avenue%20NE%2C%20Ceder%20Rapids%2C%20Iowa%2052401%20United%20States&apiKey=57e88c5179064b9db5ccd0973355973a"

In [3]:
y_train.drop(columns = ['Unnamed: 0'],inplace = True)
x_train.drop(columns = ['Unnamed: 0'],inplace = True)

Powered by <a href="https://www.geoapify.com/">Geoapify</a>

In [4]:
headers = CaseInsensitiveDict()
headers['Accept'] = "application/json"
resp1 = requests.get(url1, headers = headers)
resp2 = requests.get(url2, headers = headers)
resp3 = requests.get(url3, headers = headers)
resp4 = requests.get(url4, headers = headers)

print(resp1.status_code)
print(resp2.status_code)
print(resp3.status_code)
print(resp4.status_code)

200
200
200
200


In [5]:
features1 = resp1.json()['features']

properties1 = features1[1]['properties']
lat1 = properties1['lat']
lon1 = properties1['lon']
state1 = properties1['state']

In [6]:
features2 = resp2.json()['features']

properties2 = features2[1]['properties']
lat2 = properties2['lat']
lon2 = properties2['lon']
state2 = properties2['state']

In [7]:
features3 = resp3.json()['features']
features3
properties3 = features3[0]['properties']
lat3 = properties3['lat']
lon3 = properties3['lon']
state3 = properties3['state']

In [8]:
features4 = resp4.json()['features']

properties4 = features4[0]['properties']
lat4 = properties4['lat']
lon4 = properties4['lon']
state4 = properties4['state']

In [9]:
expected_model_version = '1.0'
model_path = '../models/power_predictor.pkl'
if os.path.exists(model_path):
    with open(model_path, 'rb') as f:
        model = pickle.load(f)
    if model.version != expected_model_version:
        print("Expected model version doesn't match version loaded")
    if model.sklearn_version != sklearn_version:
        print("Warning: model created under different sklearn version")
else:
    print("Expected model not found")

In [10]:
Electricity_cost.set_index('State', inplace = True)
Electricity_cost.drop(columns = 'Unnamed: 0', inplace = True)

In [11]:
Electricity_cost.head()

Unnamed: 0_level_0,Electricity Prices
State,Unnamed: 1_level_1
Alabama,9.84
Alaska,19.82
Arizona,10.44
Arkansas,8.32
California,18.0


In [12]:
Attributes.set_index('Unnamed: 0', inplace = True)

In [13]:
Attributes.head()

Unnamed: 0_level_0,air_temperature,alpha,aod,asymmetry,cld_opd_dcomp,cld_reff_dcomp,clearsky_dhi,clearsky_dni,clearsky_ghi,cloud_press_acha,...,ozone,relative_humidity,solar_zenith_angle,ssa,surface_albedo,surface_pressure,time_index,total_precipitable_water,wind_direction,wind_speed
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
data_source,MERRA2,MERRA2,MERRA2,climatology,UW-GOES,UW-GOES,output,output,output,UW-GOES,...,MERRA2,derived,calculated,MERRA2,MODIS-IMS,MERRA2,,MERRA2,MERRA2,MERRA2
elevation_correction,True,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,True,,True,False,False
physical_max,70.0,2.5,1.5,1.0,80.0,80.0,800.0,1350.0,1350.0,1100.0,...,0.5,100.0,180.0,1.0,1.0,1100.0,,15.0,360.0,40.0
physical_min,-100.0,0.0,0.01,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.2,0.0,0.0,0.0,0.0,300.0,,0.0,0.0,0.0
psm_scale_factor,10.0,100.0,10000.0,100.0,100.0,100.0,1.0,1.0,1.0,1.0,...,1000.0,100.0,100.0,100.0,100.0,1.0,,10.0,1.0,10.0


In [14]:
def getLocationData(lat, lon, cols = ['meta']):
    
    tree = cKDTree(nsrdb['coordinates'])
    dist, pos = tree.query(np.array([lat, lon]))
    
    df = pd.DataFrame(columns = cols)
    
    for col in cols:
        df[col] = nsrdb[col][:, pos]
    
    return df

In [None]:
columns = ['air_temperature', 'relative_humidity', 'dew_point', 'wind_speed', 'surface_pressure', 'total_precipitable_water', 'ghi']
Locationdf1 = getLocationData(lat1, lon1, cols = columns)
Locationdf2 = getLocationData(lat2, lon2, cols = columns)
Locationdf3 = getLocationData(lat3, lon3, cols = columns)
Locationdf4 = getLocationData(lat4, lon4, cols = columns)
for col in columns:
    scale = float(Attributes.loc['scale_factor', col])
    Locationdf1[col] = Locationdf1[col] / scale
    Locationdf2[col] = Locationdf2[col] / scale
    Locationdf3[col] = Locationdf3[col] / scale
    Locationdf4[col] = Locationdf4[col] / scale

In [None]:
column_mapper = {'air_temperature': 'TempOut',
                'surface_pressure': 'Bar',
                'relative_humidity': 'OutHum',
                'wind_speed': 'WindSpeed',
                'dew_point': 'DewPt',
                'ghi': 'SolarRad',
                'total_precipitable_water': 'Rain'}
Locationdf1.rename(columns = column_mapper, inplace=True)
Locationdf1['Bar'] = Locationdf1['Bar']
Locationdf1['Rain'] = Locationdf1['Rain'] / 0.25
Locationdf1['SolarEnergy'] = Locationdf1['SolarRad'] * 0.5 / 11.622
Locationdf1['Temp_Pressure_ratio'] = Locationdf1['TempOut'] / Locationdf1['Bar']
Locationdf2.rename(columns = column_mapper, inplace=True)
Locationdf2['Bar'] = Locationdf2['Bar']
Locationdf2['Rain'] = Locationdf2['Rain'] / 0.25
Locationdf2['SolarEnergy'] = Locationdf2['SolarRad'] * 0.5 / 11.622
Locationdf2['Temp_Pressure_ratio'] = Locationdf2['TempOut'] / Locationdf2['Bar']
Locationdf3.rename(columns = column_mapper, inplace=True)
Locationdf3['Bar'] = Locationdf3['Bar']
Locationdf3['Rain'] = Locationdf3['Rain'] / 0.25
Locationdf3['SolarEnergy'] = Locationdf3['SolarRad'] * 0.5 / 11.622
Locationdf3['Temp_Pressure_ratio'] = Locationdf3['TempOut'] / Locationdf3['Bar']
Locationdf4.rename(columns = column_mapper, inplace=True)
Locationdf4['Bar'] = Locationdf4['Bar']
Locationdf4['Rain'] = Locationdf4['Rain'] / 0.25
Locationdf4['SolarEnergy'] = Locationdf4['SolarRad'] * 0.5 / 11.622
Locationdf4['Temp_Pressure_ratio'] = Locationdf4['TempOut'] / Locationdf4['Bar']

In [None]:
fig, ax = plt.subplots(3, 3, figsize = (15, 10))
Locationdf1.hist(figsize = (15, 10), ax = ax)
plt.subplots_adjust(hspace=0.8, wspace = 0.5)
fig.suptitle("Feature Distributions for location 1: White House");

In [None]:
fig, ax = plt.subplots(3, 3, figsize = (15, 10))
Locationdf2.hist(figsize = (15, 10), ax = ax)
plt.subplots_adjust(hspace=0.8, wspace = 0.5)
fig.suptitle("Feature Distributions for location 2: Space Needle, Seattle, Washington");

In [None]:
fig, ax = plt.subplots(3, 3, figsize = (15, 10))
Locationdf1.hist(figsize = (15, 10), ax = ax)
plt.subplots_adjust(hspace=0.8, wspace = 0.5)
fig.suptitle("Feature Distributions for location 3: Hotel in Albuquerque, New Mexico");

In [None]:
fig, ax = plt.subplots(3, 3, figsize = (15, 10))
Locationdf1.hist(figsize = (15, 10), ax = ax)
plt.subplots_adjust(hspace=0.8, wspace = 0.5)
fig.suptitle("Feature Distributions for location 4: Hotel in Cedar Rapids, Iowa");

In [None]:
model.fit(x_train, y_train)

In [None]:
Locationdf1['Power'] = model.predict(Locationdf1)
Locationdf2['Power'] = model.predict(Locationdf2)
Locationdf3['Power'] = model.predict(Locationdf3)
Locationdf4['Power'] = model.predict(Locationdf4)

In [None]:
fig, ax = plt.subplots(2, 2, figsize = (14, 12))
plt.subplots_adjust(wspace = 0.6, hspace = 0.4)
ax[0, 0].hist(Locationdf1['Power'], bins = 10)
ax[0, 0].set_title('Power Generated for Location 1: White House, Washington D.C.')
ax[0, 0].set_xlabel('Power')
ax[0, 0].set_ylabel('Count')
ax[0, 0].axvline(x = Locationdf1['Power'].mean(), c = 'r', linestyle = '--')
ax[0, 1].hist(Locationdf2['Power'], bins = 10)
ax[0, 1].set_title('Power Generated for Location 2: Space Needle, Seattle, Washington')
ax[0, 1].set_xlabel('Power')
ax[0, 1].set_ylabel('Count')
ax[0, 1].axvline(x = Locationdf2['Power'].mean(), c = 'r', linestyle = '--')
ax[1, 0].hist(Locationdf3['Power'], bins = 10)
ax[1, 0].set_title('Power Generated for Location 3: Albuquerque, New Mexico')
ax[1, 0].set_xlabel('Power')
ax[1, 0].set_ylabel('Count')
ax[1, 0].axvline(x = Locationdf3['Power'].mean(), c = 'r', linestyle = '--')
ax[1, 1].hist(Locationdf4['Power'], bins = 10)
ax[1, 1].set_title('Power Generated for Location 4: Cedar Rapids, Iowa')
ax[1, 1].set_xlabel('Power')
ax[1, 1].set_ylabel('Count')
ax[1, 1].axvline(x = Locationdf4['Power'].mean(), c = 'r', linestyle = '--')
fig.suptitle('Power Distributions, line at mean');

In [None]:
StateElectricityCost1 = Electricity_cost.loc[state1]['Electricity Prices']
StateElectricityCost2 = Electricity_cost.loc[state2]['Electricity Prices']
StateElectricityCost3 = Electricity_cost.loc[state3]['Electricity Prices']
StateElectricityCost4 = Electricity_cost.loc[state4]['Electricity Prices']

In [None]:
Locationdf1['Money_saved'] = Locationdf1['Power'] * (StateElectricityCost1 / 2)
Locationdf2['Money_saved'] = Locationdf2['Power'] * (StateElectricityCost2 / 2)
Locationdf3['Money_saved'] = Locationdf3['Power'] * (StateElectricityCost3 / 2)
Locationdf4['Money_saved'] = Locationdf4['Power'] * (StateElectricityCost4 / 2)

In [None]:
dollars_saved1 = Locationdf1['Money_saved'].sum() / 100
dollars_saved2 = Locationdf2['Money_saved'].sum() / 100
dollars_saved3 = Locationdf3['Money_saved'].sum() / 100
dollars_saved4 = Locationdf4['Money_saved'].sum() / 100

In [None]:
print('The amount that you would save per year at location 1 (White House) is $' + str(dollars_saved1))
print('The amount that you would save per year at location 2 (Washington) is $' + str(dollars_saved2))
print('The amount that you would save per year at location 3 (Seattle) is $' + str(dollars_saved3))
print('The amount that you would save per year at location 4 (Iowa) is $' + str(dollars_saved4))

A 600 W installation, which this model assumes, would cost around 1800 to 3000 dollars to install. Research suggests that the average time to pay off a solar installation is six to ten years. This largely depends on the electricity cost in your area. In Washington D.C., six to ten years is very achievable, but in the other locations it will take you closer to ten to fifteen years. 