In [1]:
import time
import numpy as np
import pandas as pd
from datetime import datetime, timezone

In [2]:
locations = pd.read_csv("../Metadata/Station_Locations.csv")
locations.head(3)

Unnamed: 0,Station ID,Site Name,Address,Site Type,Station Type,Longitude,Latitude,Start Date
0,AAC,Allen AME Church,Allen AME Chruch,Church,AWS,-76.636494,39.29136,5/19/2023
1,ABL,Abell,"3202 Barclay St, Baltimore",Private Residence,AWS,-76.611476,39.327251,8/8/2023
2,AGC,Amazing Grace Church,"2424 McElderry St, Baltimore",Church,AWS,-76.582858,39.298248,8/8/2023


In [26]:
def BsecHasUsableData( file, targetTime, minTime ):
    
    df = pd.read_csv(file)
    df['obsTimeUtc'] = pd.to_datetime( df['obsTimeUtc'], utc=True )
    filtered_df = df[ (df['obsTimeUtc'] > minTime) & (df['obsTimeUtc'] <= targetTime) ] 

    # Check if filtered_df is empty before trying to access last element
    if not filtered_df.empty:
    
        last_element = filtered_df['obsTimeUtc'].iloc[-1]
        row = df[df['obsTimeUtc'] == last_element]
        
        # check quality control, 1 = good data
        qc = row['qcStatus'].iloc[0]
        if ( qc != 1 ):
            return -1, -1
        else:
            
            # sum over axis=1 means sum across row and not down a column
            nulls = int( row.isnull().sum(axis=1) )
            return nulls, row
    
    else:
            
        return -1, -1

In [33]:
outDir = "../Results/"

# 1-hour before Landsat measurements
outFiles = [outDir + 'BSEC_Data_1Hour_Prior_07_14_2023.csv',
            outDir + 'BSEC_Data_1Hour_Prior_07_30_2023.csv',
            outDir + 'BSEC_Data_1Hour_Prior_08_23_2023.csv',
            outDir + 'BSEC_Data_1Hour_Prior_08_31_2023.csv']
weatherTimes = [datetime(2023,7,14,15,46, tzinfo=timezone.utc),
                datetime(2023,7,30,15,46, tzinfo=timezone.utc),
                datetime(2023,8,23,15,46, tzinfo=timezone.utc),
                datetime(2023,8,31,15,46, tzinfo=timezone.utc)]
weatherTimesMin = [datetime(2023,7,14,14,46, tzinfo=timezone.utc),
                   datetime(2023,7,30,14,46, tzinfo=timezone.utc),
                   datetime(2023,8,23,14,46, tzinfo=timezone.utc),
                   datetime(2023,8,31,14,46, tzinfo=timezone.utc)]

In [34]:
# loop over days/times we need
for ix, weatherTime in enumerate(weatherTimes):
    
    print("Working on:", outFiles[ix])
    print('Target Time:', weatherTime)
    print('Minimum Time:', weatherTimesMin[ix])
    print()
    
    # open output file
    f = open(outFiles[ix], 'w')
    line =  'stationID,tz,obsTimeUtc,obsTimeLocal,epoch,bsecLat,bsecLon,solarRadiationHigh,'
    line += 'uvHigh,winddirAvg,humidityHigh,humidityLow,humidityAvg,qcStatus,tempHigh,'
    line += 'tempLow,tempAvg,windspeedHigh,windspeedLow,windspeedAvg,windgustHigh,windgustLow,'
    line += 'windgustAvg,dewptHigh,dewptLow,dewptAvg,windchillHigh,windchillLow,windchillAvg,'
    line += 'heatindexHigh,heatindexLow,heatindexAvg,pressureMax,pressureMin,pressureTrend,'
    line += 'precipRate,precipTotal\n'
    f.write( line )
    
    # loop over all AWS BSEC stations
    distances = {}
    for ix2, row2 in locations.iterrows():

        site = row2['Station ID'] 
        typ = row2['Station Type']
        if ( typ == 'AWS' ):
            
            line = ''
            bsecFile = '../BSEC/BSEC-' + site + '_AWS_hourly_2023.csv'
            nulls, row = BsecHasUsableData( bsecFile, weatherTime, weatherTimesMin[ix] )
                        
            # if station has data at this time then write it to output file
            if ( nulls == 0 ):
                line += site + "," 
                line += row['tz'].iloc[0] + "," 
                line += str(row['obsTimeUtc'].iloc[0]) + "," 
                line += str(row['obsTimeLocal'].iloc[0]) + "," 
                line += str(row['epoch'].iloc[0]) + "," 
                line += str(row['lat'].iloc[0]) + "," 
                line += str(row['lon'].iloc[0]) + "," 
                line += str(row['solarRadiationHigh'].iloc[0]) + "," 
                line += str(row['uvHigh'].iloc[0]) + "," 
                line += str(row['winddirAvg'].iloc[0]) + "," 
                line += str(row['humidityHigh'].iloc[0]) + "," 
                line += str(row['humidityLow'].iloc[0]) + "," 
                line += str(row['humidityAvg'].iloc[0]) + "," 
                line += str(row['qcStatus'].iloc[0]) + "," 
                line += str(row['tempHigh'].iloc[0]) + "," 
                line += str(row['tempLow'].iloc[0]) + "," 
                line += str(row['tempAvg'].iloc[0]) + "," 
                line += str(row['windspeedHigh'].iloc[0]) + "," 
                line += str(row['windspeedLow'].iloc[0]) + "," 
                line += str(row['windspeedAvg'].iloc[0]) + "," 
                line += str(row['windgustHigh'].iloc[0]) + "," 
                line += str(row['windgustLow'].iloc[0]) + "," 
                line += str(row['windgustAvg'].iloc[0]) + "," 
                line += str(row['dewptHigh'].iloc[0]) + "," 
                line += str(row['dewptLow'].iloc[0]) + "," 
                line += str(row['dewptAvg'].iloc[0]) + "," 
                line += str(row['windchillHigh'].iloc[0]) + "," 
                line += str(row['windchillLow'].iloc[0]) + "," 
                line += str(row['windchillAvg'].iloc[0]) + "," 
                line += str(row['heatindexHigh'].iloc[0]) + "," 
                line += str(row['heatindexLow'].iloc[0]) + "," 
                line += str(row['heatindexAvg'].iloc[0]) + "," 
                line += str(row['pressureMax'].iloc[0]) + "," 
                line += str(row['pressureMin'].iloc[0]) + "," 
                line += str(row['pressureTrend'].iloc[0]) + "," 
                line += str(row['precipRate'].iloc[0]) + "," 
                line += str(row['precipTotal'].iloc[0]) + "\n"
                f.write( line )

    f.close()

Working on: ../Results/BSEC_Data_1Hour_Prior_07_14_2023.csv
Target Time: 2023-07-14 15:46:00+00:00
Minimum Time: 2023-07-14 14:46:00+00:00



  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )


Working on: ../Results/BSEC_Data_1Hour_Prior_07_30_2023.csv
Target Time: 2023-07-30 15:46:00+00:00
Minimum Time: 2023-07-30 14:46:00+00:00

Working on: ../Results/BSEC_Data_1Hour_Prior_08_23_2023.csv
Target Time: 2023-08-23 15:46:00+00:00
Minimum Time: 2023-08-23 14:46:00+00:00



  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )


Working on: ../Results/BSEC_Data_1Hour_Prior_08_31_2023.csv
Target Time: 2023-08-31 15:46:00+00:00
Minimum Time: 2023-08-31 14:46:00+00:00



  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )
  nulls = int( row.isnull().sum(axis=1) )


In [38]:
test = pd.read_csv(outFiles[3])  
test.shape

(7, 37)

In [39]:
test.head(2)

Unnamed: 0,stationID,tz,obsTimeUtc,obsTimeLocal,epoch,bsecLat,bsecLon,solarRadiationHigh,uvHigh,winddirAvg,...,windchillLow,windchillAvg,heatindexHigh,heatindexLow,heatindexAvg,pressureMax,pressureMin,pressureTrend,precipRate,precipTotal
0,AAC,America/New_York,2023-08-31 14:59:00+00:00,2023-08-31 10:59:00,1693493940,39.291284,-76.636279,38.6,0.0,102.0,...,20.8,21.1,21.2,20.6,20.8,1003.12,1002.1,0.62,0.0,0.0
1,ABL,America/New_York,2023-08-31 14:58:00+00:00,2023-08-31 10:58:00,1693493880,39.327258,-76.611422,605.7,5.0,287.0,...,21.0,22.4,22.5,20.8,22.0,1017.51,1016.59,0.53,0.0,0.0
