In [1]:
import pandas as pd
import numpy as np

In [2]:
#-------------------------------------------------------------------#
#Fetching Data From NSRDB Database
#-------------------------------------------------------------------#

In [3]:
#NSRDB = National Solar Radiation Data Base

#NSRDB Database
#https://rredc.nrel.gov/solar/old_data/nsrdb/1991-2005/tmy3/

#Getting the NSRDB Key/API instructions
#https://nsrdb.nrel.gov/api-instructions

In [10]:
# Declare all variables as strings. Spaces must be replaced with '+', i.e., change 'John Smith' to 'John+Smith'.

# Define the lat, long of the location and the year
#lat, lon, year = 42.3505, -71.1054, 2017 #BU CAS Roof
lat, lon, year = 35.0844, -106.6504, 2017 #Albuquerque
# You must request an NSRDB api key from the link above
api_key = 'qcS1rONVybE6Gtw1heDE0dFatuaHBmoVozd1weZX'
# Set the attributes to extract (e.g., dhi, ghi, etc.), separated by commas.
attributes = 'ghi,dhi,dni,wind_speed,air_temperature,solar_zenith_angle'
# Choose year of data
year = '2017'
# Set leap year to true or false. True will return leap day data if present, false will not.
leap_year = 'false'
# Set time interval in minutes, i.e., '30' is half hour intervals. Valid intervals are 30 & 60.
interval = '60'
# Specify Coordinated Universal Time (UTC), 'true' will use UTC, 'false' will use the local time zone of the data.
# NOTE: In order to use the NSRDB data in SAM, you must specify UTC as 'false'. SAM requires the data to be in the
# local time zone.
utc = 'false'
# Your full name, use '+' instead of spaces.
your_name = 'Aditya+Wikara'
# Your reason for using the NSRDB.
reason_for_use = 'academic+research'
# Your affiliation
your_affiliation = 'Boston+University'
# Your email address
your_email = 'adwikara@bu.edu'
# Please join our mailing list so we can keep you up-to-date on new developments.
mailing_list = 'false'

# Declare url string
url = 'http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT({lon}%20{lat})&names={year}&leap_day={leap}&interval={interval}&utc={utc}&full_name={name}&email={email}&affiliation={affiliation}&mailing_list={mailing_list}&reason={reason}&api_key={api}&attributes={attr}'.format(year=year, lat=lat, lon=lon, leap=leap_year, interval=interval, utc=utc, name=your_name, email=your_email, mailing_list=mailing_list, affiliation=your_affiliation, reason=reason_for_use, api=api_key, attr=attributes)
# Return just the first 2 lines to get metadata:
info = pd.read_csv(url, nrows=1)
# See metadata for specified properties, e.g., timezone and elevation
timezone, elevation = info['Local Time Zone'], info['Elevation']

In [5]:
# View metadata
info

Unnamed: 0,Source,Location ID,City,State,Country,Latitude,Longitude,Time Zone,Elevation,Local Time Zone,...,Cloud Type 11,Cloud Type 12,Fill Flag 0,Fill Flag 1,Fill Flag 2,Fill Flag 3,Fill Flag 4,Fill Flag 5,Surface Albedo Units,Version
0,NSRDB,94139,-,-,-,35.09,-106.66,-7,1496,-7,...,Dust,Smoke,,Missing Image,Low Irradiance,Exceeds Clearsky,Missing CLoud Properties,Rayleigh Violation,,v3.0.0


In [8]:
# Return all but first 2 lines of csv to get data:
df = pd.read_csv(url, skiprows=2)

# Set the time index in the pandas dataframe:
#525600 is number of minutes in a year
df = df.set_index(pd.date_range('1/1/{yr}'.format(yr=year), freq=interval+'Min', periods=525600/int(interval)))

# take a look
print('shape:',df.shape)
#Look at 10 data rows from the first row
df.head(10)
#Look at 10 data rows from the last row
df.tail(10)

#Write to csv file
#df.to_csv('./processed_files/albuquerque_weather_file.csv')

shape: (8760, 11)


Unnamed: 0,Year,Month,Day,Hour,Minute,GHI,DHI,DNI,Wind Speed,Temperature,Solar Zenith Angle
2017-12-31 14:00:00,2017,12,31,14,30,288,158,330,0.7,15,66.93
2017-12-31 15:00:00,2017,12,31,15,30,176,97,304,0.7,12,74.98
2017-12-31 16:00:00,2017,12,31,16,30,50,33,186,0.9,11,84.64
2017-12-31 17:00:00,2017,12,31,17,30,0,0,0,1.1,4,95.36
2017-12-31 18:00:00,2017,12,31,18,30,0,0,0,1.2,2,106.84
2017-12-31 19:00:00,2017,12,31,19,30,0,0,0,1.3,1,118.8
2017-12-31 20:00:00,2017,12,31,20,30,0,0,0,1.4,0,131.0
2017-12-31 21:00:00,2017,12,31,21,30,0,0,0,1.4,0,143.24
2017-12-31 22:00:00,2017,12,31,22,30,0,0,0,1.4,0,155.08
2017-12-31 23:00:00,2017,12,31,23,30,0,0,0,1.3,0,165.05


In [7]:
#-------------------------------------------------------------------#
#Show All Attribuets of NSRDB Data
#-------------------------------------------------------------------#

In [8]:
url2 = 'http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT({lon}%20{lat})&names={year}&leap_day={leap}&interval={interval}&utc={utc}&full_name={name}&email={email}&affiliation={affiliation}&mailing_list={mailing_list}&reason={reason}&api_key={api}'.format(year=year, lat=lat, lon=lon, leap=leap_year, interval=interval, utc=utc, name=your_name, email=your_email, mailing_list=mailing_list, affiliation=your_affiliation, reason=reason_for_use, api=api_key)
# Return all but first 2 lines of csv to get data:
dff = pd.read_csv(url2, skiprows=2)
att = df.columns.values
print(df.columns.values)

['Year' 'Month' 'Day' 'Hour' 'Minute' 'Temperature' 'Clearsky DHI'
 'Clearsky DNI' 'Clearsky GHI' 'Cloud Type' 'Dew Point' 'DHI' 'DNI'
 'Fill Flag' 'GHI' 'Relative Humidity' 'Solar Zenith Angle'
 'Surface Albedo' 'Pressure' 'Precipitable Water' 'Wind Direction'
 'Wind Speed']


In [9]:
#-------------------------------------------------------------------#
#Extract data from NSRDB database based on desired day
#-------------------------------------------------------------------#

In [221]:
import datetime as dt
#Get the nth day of the year
def get_day():
    year = 2017
    month = 4
    day = 2
    days_in_the_year = (dt.date(year, month, day) - dt.date(year,1,1)).days + 1
    return days_in_the_year

#Get the start and end index for the NSRDB dataframe
def get_today_index(days,interval):
    if (interval == '60'):
        day_end = days*24
        day_start = day_end - 24   
        return day_start,day_end
    elif (interval == '30'):
        day_end = days*24*2
        day_start = day_end - 48
        return day_start,day_end
    else:
        print("ERROR")

#Extract data for a desired day from the annual NSRDB data
days_in_the_year = get_day()
(start,end) = get_today_index(days_in_the_year,interval)
df[start:end]

#Write to csv file
df.to_csv('./processed_files/today_data.csv')

In [222]:
#-------------------------------------------------------------------#
#Extract annual noon-data from NSRDB database
#-------------------------------------------------------------------#

In [223]:
#Create new dataframe
att2 = ["Year","Month","Day","Hour","Minute","GHI","DHI","DNI","Wind Speed","Temperature","Solar Zenith Angle"]
df_2 = pd.DataFrame(columns=att2)
index_col = []

#Get the data from only noon time
for index, row in df.iterrows():
    indexs = str(index)
    if (indexs[11:13] == "12"):
        index_col.append(indexs)
        df_2 = df_2.append(dict(row),ignore_index=True)

#Formatting of the time/date of the dataframe
convert_dict = {'Year': int, 'Month': int,'Day':int,'Hour':int,'Minute':int} 
df_2 = df_2.astype(convert_dict)

#Change the index of the data frame
for x in list(df_2.index):
    df_2 = df_2.rename(index={x: index_col[x]})

#Write to csv file
df_2.to_csv('./processed_files/2017_annual_noon_data.csv')

In [224]:
#-------------------------------------------------------------------#
#Extract noon-data from NSRDB database for particular month/months
#-------------------------------------------------------------------#

In [225]:
#Saving noon data for 1 month
'''
January:0-31 => df_2[0:31]
February:31-59 => df_2[31:59]
March:59-90 => df_2[59:90]
April:90-120 => df_2[90:120]
May:120-151 => df_2[120:151]
June:151-181 => df_2[151:181]
July:181-212 => df_2[181:212]
Aug:212-243 => df_2[212:243]
Sep:243-273 => df_2[243:273]
Oct:273-304 => df_2[273:304]
Nov:304:334 => df_2[304:334]
Sep:334:365 => df_2[334:365]
'''
#Sample for writing data of December 2017
df_3 = df_2[334:365]

#Write to csv file
df_3.to_csv('./processed_files/dec_2017_noon_data.csv')

In [226]:
#Saving noon data for multiple months
#Sample for writing data of January-April 2017

#Create new dataframe
att2 = ["Year","Month","Day","Hour","Minute","GHI","DHI","DNI","Wind Speed","Temperature","Solar Zenith Angle"]
df_4 = pd.DataFrame(columns=att2)
index_col = []

#Append January-April Data
df_4 = df_4.append(df_2[0:120])

#Write to csv file
df_4.to_csv('./processed_files/jan-apr_2017_noon_data.csv')