In [None]:
# default_exp solar.nsrdb

In [None]:
#hide
%load_ext autoreload
%autoreload 2

In [None]:
#hide
from nbdev.showdoc import *

# National Solar Radiation Database (NSRDB)

> The National Solar Radiation Database (NSRDB) is a serially complete collection of meteorological and solar irradiance data sets for the United States and a growing list of international locations. The data are publicly available at no cost to the user and maintained by NREL. 

## Data Query

>Find NSRDB datasets for a location

In [None]:
#export
from nrel_dev_api._core import get_request, check_api_key, _API_KEY

class NSRDB_DataQuery:
    """Returns information on the closest NSRDB datasets for a location 
    including a set of links that can be used to download the data.
    """
    
    QUERY_URL = "/api/solar/nsrdb_data_query.json"
    
    def __init__(self,
                 api_key=None,
                 wkt=None,
                 address=None,
                 lat=None,
                 lon=None,
                 dataset_type=None,
                 show_empty=False,
                ):
        
        if api_key is None:
            api_key = check_api_key()
            
        self._params = {"api_key": api_key}
        
        # if well-known text is not provided look for address or lat/lon
        if not wkt:
            if not address:
                self._params.update({"lat" : lat, "lon" : lon})
            else:
                self._params.update({"address" : address})
        elif wkt:
            self._params.update({"wkt" : wkt})
        else:
            raise AttributeError("Need to specify a location. Use lat/lon ot wkt or address.")
        
        if dataset_type:
            self._params.update({"type" : dataset_type})
            
        if show_empty:
            self._params.update({"show_empty" : show_empty})
        
        r = get_request(self.QUERY_URL, self._params)
        
        # complete response as a dictionary
        self.response = r.json()
        
        self.outputs = self.response["outputs"]
        

In [None]:
#hide
NREL_API_KEY = "DEMO_KEY"

Let's set our NREL API key.

In [None]:
from nrel_dev_api import set_nrel_api_key

set_nrel_api_key(NREL_API_KEY)

Get information for a single location point using latitude and longitude.

In [None]:
nsrdb_data_query = NSRDB_DataQuery(lat=21.204, lon=72.839)

# check the outputs
nsrdb_data_query.outputs

[{'apiDocs': 'https://developer.nrel.gov/docs/solar/nsrdb/msg-iodc/',
  'availableIntervals': [15, 30, 60],
  'enabled': True,
  'displayName': 'MSG IODC: PSM v3',
  'metadataLink': 'https://nsrdb.nrel.gov/current-version',
  'name': 'msg-iodc',
  'ranking': 20,
  'availableYears': [2017, 2018, 2019],
  'apiUrl': 'https://developer.nrel.gov/api/nsrdb/v2/solar/msg-iodc-download',
  'links': [{'year': 2017,
    'interval': 15,
    'link': 'https://developer.nrel.gov/api/nsrdb/v2/solar/msg-iodc-download.csv?names=2017&wkt=POINT%2872.839+21.204%29&interval=15&api_key=yourapikey&email=youremail'},
   {'year': 2018,
    'interval': 15,
    'link': 'https://developer.nrel.gov/api/nsrdb/v2/solar/msg-iodc-download.csv?names=2018&wkt=POINT%2872.839+21.204%29&interval=15&api_key=yourapikey&email=youremail'},
   {'year': 2019,
    'interval': 15,
    'link': 'https://developer.nrel.gov/api/nsrdb/v2/solar/msg-iodc-download.csv?names=2019&wkt=POINT%2872.839+21.204%29&interval=15&api_key=yourapikey&e

Using a well-known text (WKT) representation of the geometry for which to extract data

In [None]:
#hide_output
nsrdb_data_query = NSRDB_DataQuery(wkt="POINT(91.287 23.832)")

# check the outputs
nsrdb_data_query.outputs

[{'apiDocs': 'https://developer.nrel.gov/docs/solar/nsrdb/himawari-download/',
  'availableIntervals': [10, 30, 60],
  'enabled': True,
  'displayName': 'Himawari',
  'metadataLink': 'https://nsrdb.nrel.gov',
  'name': 'himawari',
  'ranking': 20,
  'availableYears': [2016, 2017, 2018, 2019],
  'apiUrl': 'https://developer.nrel.gov/api/nsrdb/v2/solar/himawari-download',
  'links': [{'year': 2016,
    'interval': 10,
    'link': 'https://developer.nrel.gov/api/nsrdb/v2/solar/himawari-download.csv?names=2016&wkt=POINT%2891.287+23.832%29&interval=10&api_key=yourapikey&email=youremail'},
   {'year': 2017,
    'interval': 10,
    'link': 'https://developer.nrel.gov/api/nsrdb/v2/solar/himawari-download.csv?names=2017&wkt=POINT%2891.287+23.832%29&interval=10&api_key=yourapikey&email=youremail'},
   {'year': 2018,
    'interval': 10,
    'link': 'https://developer.nrel.gov/api/nsrdb/v2/solar/himawari-download.csv?names=2018&wkt=POINT%2891.287+23.832%29&interval=10&api_key=yourapikey&email=your

## Data Downloader

> Download solar irradiance and meteorological data for a location.

In [None]:
#export
from itertools import chain


def get_nsrdb_download_links(
    year,
    interval=None,
    api_key=None,
    wkt=None,
    address=None,
    lat=None,
    lon=None,
    dataset_type=None,
    show_empty=False
):
    """Get NSRDB dowload links from data query for the specified location"""
    
    if api_key is None:
        api_key = check_api_key()
    
    nsrdb_data_query = NSRDB_DataQuery(
        api_key=api_key,
        wkt=wkt,
        address=address,
        lat=lat,
        lon=lon,
        dataset_type=dataset_type,
        show_empty=show_empty,
    )
    
    outputs = nsrdb_data_query.outputs
    
    available_years = []
    available_intervals = []
    links = []
    
    # find available years
    for i in range(len(outputs)): 
        available_years.append(outputs[i]["availableYears"])
    
    # if year specified not available, raise Exception
    if year not in list(chain.from_iterable(available_years)):
        raise Exception("No data for the specified year.")
    
    # if interval is provided, check its availability
    if interval:
        # find avaialble intervals
        for i in range(len(outputs)):
            available_intervals.append(outputs[i]["availableIntervals"])

        # if time interval not available, raise Exception
        if interval not in list(chain.from_iterable(available_intervals)):
            raise Exception("No data for the specified time interval.")
    
    
    for i in range(len(outputs)):
        list_of_links = outputs[i]["links"] # this returns a list of dicts
        
        for j in range(len(list_of_links)):
            
            # the str call is to cover 'tmy-*' style named years
            if str(year) in str(list_of_links[j]["year"]):
                
                # if interval is provided, give those links only
                if interval:
                    if list_of_links[j]["interval"] == interval:                    
                        links.append(list_of_links[j]["link"])
                else:
                    links.append(list_of_links[j]["link"])
                    
    
    return links

In [None]:
#hide
import os

set_nrel_api_key(os.environ.get("NREL_API_KEY"))

Let's get all the available links for the year 2017.

In [None]:
links = get_nsrdb_download_links(year=2016, lat=47.6, lon=-122.3)
links

['https://developer.nrel.gov/api/nsrdb/v2/solar/psm3-download.csv?names=2016&wkt=POINT%28-122.3+47.6%29&interval=30&api_key=yourapikey&email=youremail',
 'https://developer.nrel.gov/api/nsrdb/v2/solar/psm3-download.csv?names=2016&wkt=POINT%28-122.3+47.6%29&interval=60&api_key=yourapikey&email=youremail']

In [None]:
# export
import pandas as pd
import warnings

# these are strings because of easy comparisons made later
LEAP_YEARS = ["1992", "1996", "2000", "2004", "2008", "2012", "2016", "2020", "2024", "2028", "2032"]

def download_nsrdb_data(
    link,
    email,
    api_key=None,
):
    """Download NSRDB data from the provided link and returns a pandas DataFrame."""
    
    total_mins_in_year = 365 * 24 * 60
    
    if not isinstance(link, str):
        raise ValueError(f"Requires a str type. You provided {type(link)} type")
    
    if api_key is None:
        api_key = check_api_key()
    
    link = link.replace("yourapikey", api_key)
    link = link.replace("youremail", email)
    
    # get and split the parameters
    p = link.split("?")[-1].split("&")
    
    for i in p:
        i = i.split("=")
        if i[0] == "interval":
            interval = i[1]
        if i[0] == "names":
            year = i[1][-4:]  # this will catch only the year even from "tmy-*" names in years
            
            if year in LEAP_YEARS:
                link = link + "&leap_day=true"
                total_mins_in_year += (24 * 60) # add extra mins for the leap year
    
    df = pd.read_csv(link, skiprows=2)
    
    try:
        # if this doesn't raise valuerror, then we can go ahead and set the new index
        int(year)
        df = df.set_index(pd.date_range(f"1/1/{year}", freq=interval+'Min', periods=total_mins_in_year/int(interval)))
    
    except ValueError:
        warnings.warn("Could not set the index to datetime; please do it manually", UserWarning)
        
    return df

Once we have the available links, we can download the data of interest.

Here, we will download from the first link.

In [None]:
df = download_nsrdb_data(links[0], email="sarthakjariwala1@gmail.com")

In [None]:
df.columns

Index(['Year', 'Month', 'Day', 'Hour', 'Minute', 'Temperature', 'Clearsky DHI',
       'Clearsky DNI', 'Clearsky GHI', 'Cloud Type', 'Dew Point', 'DHI', 'DNI',
       'Fill Flag', 'GHI', 'Relative Humidity', 'Solar Zenith Angle',
       'Surface Albedo', 'Pressure', 'Precipitable Water', 'Wind Direction',
       'Wind Speed', 'Global Horizontal UV Irradiance (280-400nm)',
       'Global Horizontal UV Irradiance (295-385nm)'],
      dtype='object')

In [None]:
df[["Temperature", "GHI", "Pressure", "Wind Speed", "Precipitable Water"]]

Unnamed: 0,Temperature,GHI,Pressure,Wind Speed,Precipitable Water
2016-01-01 00:00:00,-1,36,1010,0.9,0.430
2016-01-01 00:30:00,-1,0,1010,0.8,0.423
2016-01-01 01:00:00,-1,0,1010,0.8,0.416
2016-01-01 01:30:00,-1,0,1010,0.7,0.411
2016-01-01 02:00:00,-2,0,1010,0.7,0.406
...,...,...,...,...,...
2016-12-31 21:30:00,1,272,990,1.2,1.062
2016-12-31 22:00:00,1,237,990,1.3,1.090
2016-12-31 22:30:00,1,192,990,1.3,1.082
2016-12-31 23:00:00,1,140,990,1.3,1.075


The index in the `DataFrame` returned automatically accounts for the leap years.

In [None]:
feb = df.loc["2016-02"][["Year", "Month", "Day", "Minute"]]
feb.tail()

Unnamed: 0,Year,Month,Day,Minute
2016-02-29 21:30:00,2016,2,29,30
2016-02-29 22:00:00,2016,2,29,0
2016-02-29 22:30:00,2016,2,29,30
2016-02-29 23:00:00,2016,2,29,0
2016-02-29 23:30:00,2016,2,29,30
