In [1]:
testfolder = 'TEMP'
exampleflag = False
debugflag = False

In [2]:
import numpy as np
import pandas as pd
import datetime
import pprint
import os

In [3]:
# This information helps with debugging and getting support :)
import sys, platform
print("Working on a ", platform.system(), platform.release())
print("Python version ", sys.version)
print("Pandas version ", pd.__version__)


Working on a  Windows 10
Python version  3.12.4 | packaged by Anaconda, Inc. | (main, Jun 18 2024, 15:03:56) [MSC v.1929 64 bit (AMD64)]
Pandas version  2.2.2


In [4]:
def updateSRRL(srrlfile='bifacial_MIDC_2019.txt', interval='15T', start=None, end=None):
    '''
    updateSRRL(srrlfile)
    
    keep the file in the RTC analysis folder updated with current 15-minute SRRL data
    default file: bifacial_MIDC_2019.txt
    
    start: optional startdate YYYYMMDD to manually override
    end:   optional enddate YYYYMMDD to manually override

    ## SRRL API - limited to 211 days. Described here: https://midcdmz.nrel.gov/apps/data_api_doc.pl?BMS
    # https://midc.nrel.gov/apps/data_api.pl?site=SSSSSSSS&begin=YYYYMMDD&end=YYYYMMDD
    # NOTE:  this downloads EVERYTHING - and takes forever.  
    # NOTE2:  SRRL changed data format in 2020. you need to query 2019 and 2020 data in SEPARATE queries 
    '''
    
    from datetime import datetime
    import pandas as pd


    
    try:
        srrldata = pd.read_csv(srrlfile, index_col =0)
        srrldata.set_index(pd.to_datetime(srrldata.index), inplace=True)
        if start is None:
            start = pd.to_datetime(srrldata.index[-1]).strftime('%Y%m%d')  # start at the most recent data point in the file
    except FileNotFoundError: # file doesn't exist yet - start from scratch
            # default startdate = 1/1/2019 if the file can't be read
        if start is None:
            start = pd.to_datetime('2019-01-01').strftime('%Y%m%d')
        
        srrldata = pd.DataFrame()

    if end is None:
        end = pd.to_datetime(datetime.now()).strftime('%Y%m%d')  # today
    
    # if interval == '1T': # SIL Temporary fix because format changed after this...
    #     end = '20191231'
        
    if (pd.to_datetime(end) - pd.to_datetime(start)).days < 200:
        
        newdata = _getSRRL(start,end)
    else: # just get the maximum increment of 200 days
        end = (pd.to_datetime(start)+pd.to_timedelta('200d')).strftime('%Y%m%d') 
        newdata = _getSRRL(start,end)
        
    srrldata = _averageSRRL(srrldata, interval)
    newdata15 = _averageSRRL(newdata, interval) # 15-minute averaged, raw
    
    # combine with existing srrldata
    
    combined = pd.concat([srrldata,newdata15],sort=True) #.drop_duplicates()
    #de-dupe the data
    combined = combined.loc[~combined.index.duplicated(keep='first')]
    
    print('Saving to file: {}'.format(srrlfile))
    combined.to_csv(srrlfile)
    return combined    
    
    
def _getSRRL(start,end):
    '''
    getSRRL(start,end)
    
    API function to call SRRL BMS data
    start, end:  YYYYMMDD format
    
    returns:  dataframe
    
    '''
    import io
    import pandas as pd
    import requests
    
    
    site = 'BMS'
    paramlist = ['Direct CHP1-1 [W/m^2]',
            'Diffuse 8-48 (vent) [W/m^2]',
            'Albedo (CMP11)',
            'Global CMP22 (vent/cor) [W/m^2]',
            'Tower Dry Bulb Temp [deg C]',
            'Tower RH [%]',
            'Station Pressure [mBar]',
            'Avg Wind Speed @ 6ft [m/s]',
            'Peak Wind Speed @ 6ft [m/s]',
            'Avg Wind Direction @ 6ft [deg from N]',
            'Opaque Cloud Cover [%]',
            'Total Cloud Cover [%]',
            'Precipitation [mm]',
            'Precipitation (Accumulated) [mm]',
            'Moisture',
            'Snow Depth [cm]',
            'Snow Depth Quality'
            ]

    url="https://midcdmz.nrel.gov/apps/data_api.pl?site={}&begin={}&end={}".format(site,start,end)
    s=requests.get(url).content
    df=pd.read_csv(io.StringIO(s.decode('utf-8')))
    # change Year, DOY and MST into datetime index
    """
    # Printing all keys in MIDC data
    lines = list(df.columns.values)
    with open('Midcdmz Keys.txt', 'w') as f:
        for line in lines:
            f.write(f"{line}\n")
    """
    date = pd.to_datetime(df.Year, format='%Y') + pd.to_timedelta(df.DOY - 1, unit='d') 
    time = pd.to_datetime(df['MST'].apply('{:0>4}'.format),format='%H%M') #Sil is this df['MST']? ORigina lsayd "c['MST']"
    df['date'] = date 
    df['time'] = time
    df['datetime'] = pd.to_datetime({'Year':date.dt.year, 'Month':date.dt.month, 'Day':date.dt.day, 'Hour':time.dt.hour, 'Minute':time.dt.minute})
    df = df.set_index(pd.DatetimeIndex(df.datetime))
    
    return df[paramlist]

def _averageSRRL(df, interval='15T'):
    try:
        df = df.resample(interval, closed='right', label='right').mean() #
    except:
        print('Warning - unable to average')
    return df

def cleanSRRL(df):
    #function to  remove <0 data
    # set all fields except 'Tower Dry Bulb Temp [deg C]' >=0
    tamb = df['Tower Dry Bulb Temp [deg C]'].copy()
    df[df < 0] = 0
    df['Tower Dry Bulb Temp [deg C]'] = tamb
    return df

def fillYear(df, freq):
    import pandas as pd
    # add zeros for the rest of the year
    if freq is None:
        try:
            freq = pd.infer_freq(df.index)
        except:
            freq = '15T'  # 15 minute data by default
    # add a timepoint at the end of the year
    #idx = df.index
    # apply correct TZ info (if applicable)
    tzinfo = df.index.tzinfo
    starttime = pd.to_datetime('%s-%s-%s %s:%s' % (df.index.year[0],1,1,0,0 ) ).tz_localize(tzinfo)
    endtime = pd.to_datetime('%s-%s-%s %s:%s' % (df.index.year[-1],12,31,23,60-int(freq[:-1])) ).tz_localize(tzinfo)


    df2 = _averageSRRL(df, freq)
    df2.iloc[0] = 0  # set first datapt to zero to forward fill w zeros
    df2.iloc[-1] = 0  # set last datapt to zero to forward fill w zeros
    df2.loc[starttime] = 0
    df2.loc[endtime] = 0
    df2 = df2.resample(freq).pad() # add zeroes before and after data series
    return df2
    
def saveSAM_SRRLWeatherFile_SUNKITTY(srrl15, savefile='Bifacial_SAMfileAll2019_15.csv', sunkitty='sunkitty_albedo_1', includeminute = True):
    """
    Saves a dataframe with weather data from SRRL on SAM-friendly format.

    INPUT:
    data
    savefile
    includeminute  -- especially for hourly data, if SAM input does not have Minutes, it assuems it's TMY3 format and 
                      calculates the sun position 30 minutes prior to the hour (i.e. 12 timestamp means sun position at 11:30)
                      If minutes are included, it will calculate the sun position at the time of the timestamp (12:00 at 12:00)
                      Include minutes if resolution of data is not hourly duh. (but it will calculate at the timestamp)
                      
    Headers expected by SAM:
    ************************* 
    # Source	Location ID	City	State	Country	Latitude	Longitude	Time Zone	Elevation		

    Column names
    *************
    # Year	Month	Day	Hour	Minute	Wspd	Tdry	DHI	DNI	GHI	Albedo

    OR
    # Year	Month	Day	Hour	Wspd	Tdry	DHI	DNI	GHI	Albedo

    """

    import pandas as pd

    header = "Source,Location ID,City,State,Country,Latitude,Longitude,Time Zone,Elevation,,,,,,,,,,\n" + \
            "Measured,724666,DENVER/CENTENNIAL [GOLDEN - NREL],CO,USA,39.742,-105.179,-7,1829,,,,,,,,,,\n"

    if includeminute:
        savedata = pd.DataFrame({'Year':srrl15.index.year, 'Month':srrl15.index.month, 'Day':srrl15.index.day,
                                 'Hour':srrl15.index.hour, 'Minute':srrl15.index.minute,
                                 'Wspd':srrl15['Avg Wind Speed @ 6ft [m/s]'],
                                 'Tdry':srrl15['Tower Dry Bulb Temp [deg C]'],
                                 'DHI':srrl15['Diffuse 8-48 (vent) [W/m^2]'],
                                 'DNI':srrl15['Direct CHP1-1 [W/m^2]'],
                                 'GHI':srrl15['Global CMP22 (vent/cor) [W/m^2]'],
                                 'Albedo':srrl15[sunkitty]
                                 })
    else:
         savedata = pd.DataFrame({'Year':srrl15.index.year, 'Month':srrl15.index.month, 'Day':srrl15.index.day,
                                 'Hour':srrl15.index.hour,
                                 'Wspd':srrl15['Avg Wind Speed @ 6ft [m/s]'],
                                 'Tdry':srrl15['Tower Dry Bulb Temp [deg C]'],
                                 'DHI':srrl15['Diffuse 8-48 (vent) [W/m^2]'],
                                 'DNI':srrl15['Direct CHP1-1 [W/m^2]'],
                                 'GHI':srrl15['Global CMP22 (vent/cor) [W/m^2]'],
                                 'Albedo':srrl15[sunkitty]
                                 })
    with open(savefile, 'w', newline='') as ict:
        # Write the header lines, including the index variable for
        # the last one if you're letting Pandas produce that for you.
        # (see above).
        for line in header:
            ict.write(line)

        savedata.to_csv(ict, index=False)

def saveSAM_SRRLWeatherFile(srrl15, savefile='Bifacial_SAMfileAll2019_15.csv', includeminute = True):
    """
    Saves a dataframe with weather data from SRRL on SAM-friendly format.

    INPUT:
    data
    savefile
    includeminute  -- especially for hourly data, if SAM input does not have Minutes, it assuems it's TMY3 format and 
                      calculates the sun position 30 minutes prior to the hour (i.e. 12 timestamp means sun position at 11:30)
                      If minutes are included, it will calculate the sun position at the time of the timestamp (12:00 at 12:00)
                      Include minutes if resolution of data is not hourly duh. (but it will calculate at the timestamp)
                      
    Headers expected by SAM:
    ************************* 
    # Source	Location ID	City	State	Country	Latitude	Longitude	Time Zone	Elevation		

    Column names
    *************
    # Year	Month	Day	Hour	Minute	Wspd	Tdry	DHI	DNI	GHI	Albedo

    OR
    # Year	Month	Day	Hour	Wspd	Tdry	DHI	DNI	GHI	Albedo

    """

    import pandas as pd

    header = "Source,Location ID,City,State,Country,Latitude,Longitude,Time Zone,Elevation,,,,,,,,,,\n" + \
            "Measured,724666,DENVER/CENTENNIAL [GOLDEN - NREL],CO,USA,39.742,-105.179,-7,1829,,,,,,,,,,\n"

    if includeminute:
        savedata = pd.DataFrame({'Year':srrl15.index.year, 'Month':srrl15.index.month, 'Day':srrl15.index.day,
                                 'Hour':srrl15.index.hour, 'Minute':srrl15.index.minute,
                                 'Wspd':srrl15['Avg Wind Speed @ 6ft [m/s]'],
                                 'Tdry':srrl15['Tower Dry Bulb Temp [deg C]'],
                                 'DHI':srrl15['Diffuse 8-48 (vent) [W/m^2]'],
                                 'DNI':srrl15['Direct CHP1-1 [W/m^2]'],
                                 'GHI':srrl15['Global CMP22 (vent/cor) [W/m^2]'],
                                 'Albedo':srrl15['Albedo (CMP11)']
                                 })
    else:
         savedata = pd.DataFrame({'Year':srrl15.index.year, 'Month':srrl15.index.month, 'Day':srrl15.index.day,
                                 'Hour':srrl15.index.hour,
                                 'Wspd':srrl15['Avg Wind Speed @ 6ft [m/s]'],
                                 'Tdry':srrl15['Tower Dry Bulb Temp [deg C]'],
                                 'DHI':srrl15['Diffuse 8-48 (vent) [W/m^2]'],
                                 'DNI':srrl15['Direct CHP1-1 [W/m^2]'],
                                 'GHI':srrl15['Global CMP22 (vent/cor) [W/m^2]'],
                                 'Albedo':srrl15['Albedo (CMP11)']
                                 })
    with open(savefile, 'w', newline='') as ict:
        # Write the header lines, including the index variable for
        # the last one if you're letting Pandas produce that for you.
        # (see above).
        for line in header:
            ict.write(line)

        savedata.to_csv(ict, index=False)
        
def modifyAlbedoMonthly(df):
    """
    Calculate monthly albedo and returns dataframe with all original data but modified albedo values to month.
    Removes 0 and nan values before calculating the monthly mean, and if albedo for the month is 0 or nan it sets it to 0.
    """

    df0 = df.copy()
    monthlysrrl = [g for n, g in df0.groupby(pd.Grouper(freq='M'))]
    
    albs = []
    for i in range (0, 12):
        foo = monthlysrrl[i]
        #albedomonthly = np.round(foo[foo['Albedo (CMP11)']!=0]['Albedo (CMP11)'].mean(),2)
        #albedomonthly = np.round(np.nanmean(foo[foo['Albedo (CMP11)']>0]['Albedo (CMP11)']),2)
        albedomonthly = np.round(np.nanmean(foo['Albedo (CMP11)'],2))

        
        if albedomonthly < 0 or np.isnan(albedomonthly): # Check for valid result
            albedomonthly = 0
            print ("Wrong albedo for month ", i, ", monthly albedo set to 0.")
        
        albs.append(albedomonthly)
        df0.loc[foo.index,'Albedo (CMP11)']=albedomonthly
     
    df2 = df0.copy() # yearly
    df3 = df0.copy() # monthly
    df2['Albedo (CMP11)'] = np.round(np.nanmean(df0['Albedo (CMP11)'],2))
    
    return df3, df2, albs
      
    
def save_TMY3(srrl15, savefile='Bifacial_TMYfileAll2019_15.csv'):
    """
    Important: To switch from 1-24 format to 0-23, use the new_save_TMY3 function
    This function is at the moment dropping 0 hours

    Saves a dataframe with weathe data from SRRL in TMY3 data format.

    Headers expected by TMY3:
    ************************* 
    # Location ID	City	State	Time Zone	Latitude	Longitude	Elevation

    Column names
    *************
    # Date (MM/DD/YYYY)		Time (HH:MM)	GHI (W/m^2))	DNI (W/m^2))	DHI (W/m^2)		Wspd (m/s)	
    Dry-bulb (C)	Alb (unitless)	

    """

    import pandas as pd

    header = "724666, DENVER/CENTENNIAL [GOLDEN - NREL], CO, -7, 39.742,-105.179, 1829\n"

    
    #srrl15 = srrl15[srrl15.index.hour!=0] # Dropping hours that start with 0. 
    dates = srrl15.index.strftime('%#m/%#d/%Y') #TODO: Test if in linux, do '%-m/%-d/%Y' instead
    hours = srrl15.index.strftime('%H:%M')  #TODO: Test if in linux, do '%-H:%-M' instead

    savedata = pd.DataFrame({'Date (MM/DD/YYYY)':dates,
                             'Time (HH:MM)':hours,
                             'Wspd (m/s)':srrl15['Avg Wind Speed @ 6ft [m/s]'],
                             'Dry-bulb (C)':srrl15['Tower Dry Bulb Temp [deg C]'],
                             'DHI (W/m^2)':srrl15['Diffuse 8-48 (vent) [W/m^2]'],
                             'DNI (W/m^2)':srrl15['Direct CHP1-1 [W/m^2]'],
                             'GHI (W/m^2)':srrl15['Global CMP22 (vent/cor) [W/m^2]'],
                             'Alb (unitless)':srrl15['Albedo (CMP11)']})

    with open(savefile, 'w', newline='') as ict:
        # Write the header lines, including the index variable for
        # the last one if you're letting Pandas produce that for you.
        # (see above).
        for line in header:
            ict.write(line)

        savedata.to_csv(ict, index=False)
        
def new_save_TMY3(srrl15, savefile='Bifacial_TMYfileAll2019_15.csv', includeTrackerData=False):
    """
    NEW Routine to save TMY3 , assuming the columns Date and Time already exist and are in the right
    1-24 hour format. (this can be done previous to submitting to this function by
    reading a real CSV and joining those columns)
    
    Saves a dataframe with weathe data from SRRL in TMY3 data format.
    
    if includeTrackerData is True, it will also save the tracker data column.
    

    Headers expected by TMY3:
    ************************* 
    # Location ID	City	State	Time Zone	Latitude	Longitude	Elevation

    Column names
    *************
    # Date (MM/DD/YYYY)		Time (HH:MM)	GHI (W/m^2))	DNI (W/m^2))	DHI (W/m^2)		Wspd (m/s)	
    Dry-bulb (C)	Alb (unitless)	

    """

    import pandas as pd

    header = "724666, DENVER/CENTENNIAL [GOLDEN - NREL], CO, -7, 39.742,-105.179, 1829\n"

    savedata = pd.DataFrame({'Date (MM/DD/YYYY)':srrl15['Date (MM/DD/YYYY)'],
                             'Time (HH:MM)':srrl15['Time (HH:MM)'],
                             'Wspd (m/s)':srrl15['Avg Wind Speed @ 6ft [m/s]'],
                             'Dry-bulb (C)':srrl15['Tower Dry Bulb Temp [deg C]'],
                             'DHI (W/m^2)':srrl15['Diffuse 8-48 (vent) [W/m^2]'],
                             'DNI (W/m^2)':srrl15['Direct CHP1-1 [W/m^2]'],
                             'GHI (W/m^2)':srrl15['Global CMP22 (vent/cor) [W/m^2]'],
                             'Alb (unitless)':srrl15['Albedo (CMP11)']})

    if includeTrackerData:
        savedata['Tracker Angle (degrees)'] = srrl15['Tracker Angle (degrees)']

    with open(savefile, 'w', newline='') as ict:
        # Write the header lines, including the index variable for
        # the last one if you're letting Pandas produce that for you.
        # (see above).
        for line in header:
            ict.write(line)

        savedata.to_csv(ict, index=False)

In [5]:
# https://stackoverflow.com/questions/34966422/remove-leap-year-day-from-pandas-dataframe/34966636
def is_leap_and_29Feb(s):
    return (s.index.year % 4 == 0) & \
           ((s.index.year % 100 != 0) | (s.index.year % 400 == 0)) & \
           (s.index.month == 2) & (s.index.day == 29)

In [29]:
srrlfile = r'bifacial_MIDC_2024_60min.txt'
interval = '60min'

if os.path.isfile(srrlfile):
    srrldata = pd.read_csv(srrlfile, index_col =0)
    srrldata.set_index(pd.to_datetime(srrldata.index), inplace=True)   
    start = pd.to_datetime(srrldata.index[-1]).strftime('%Y%m%d')  # start at the most recent data point in the file
    end = pd.to_datetime('20241125').strftime('%Y%m%d')   # If you ask too many days, it will cry because it's "too much data". Do 180 days or less.
    newdata = _getSRRL(start,end)
    srrldata = _averageSRRL(srrldata, interval)
    newdata15 = _averageSRRL(newdata, interval=interval) # 15-minute averaged, raw

    # combine with existing srrldata
    combined = pd.concat([srrldata,newdata15],sort=True) #.drop_duplicates()
    #de-dupe the data
    combined = combined.loc[~combined.index.duplicated(keep='first')]

    print('Saving to file: {}'.format(srrlfile))
    combined.to_csv(srrlfile)
    print(end)
else:  # Year not saved yet, make new file
    start = pd.to_datetime('20240101').strftime('%Y%m%d') 
    end = pd.to_datetime('20240601').strftime('%Y%m%d') 
    newdata = _getSRRL(start,end)
    newdata15 = _averageSRRL(newdata, interval=interval) # 15-minute averaged, raw
    print('Saving to file: {}'.format(srrlfile))
    newdata15.to_csv(srrlfile)

Saving to file: bifacial_MIDC_2024_60min.txt
20241125
