In [1]:
import os
import pandas as pd
import numpy as np
import pandas as pd
from datetime import datetime
%load_ext autoreload
%autoreload 2
import pandas as pd
from fluxdataqaqc import Data, QaQc, Plot
from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.plotting import output_notebook
from bokeh.models.formatters import DatetimeTickFormatter
from bokeh.models import LinearAxis, Range1d
from bokeh.models import HoverTool
from bokeh.io import output_notebook
output_notebook()

In [2]:
base_path = "/Users/saraawad/Desktop/Datasets/Google/"
hourly_classified_path = os.path.join(base_path + "Ameriflux/", "Ameriflux Hourly Classified/")

In [3]:
class Helpers:
    def __init__(self):
        print("Helper")
        
    def convert_missing_values_nan(df):
        '''This function will convert -9999 to NaN'''
        df = df.replace(-9999.000000, np.NaN)
        return df

    def drop_nan_columns(df):
        '''Drops the columns having all theirs rows as Nans'''
        columns_to_exclude = ["Date", "Day", "Year", "Month", "Timestamp start"
                              , "Time", "TIMESTAMP", "Tier", "TIMESTAMP_START", "TIMESTAMP_END", "Day Status"]
        columns = df.columns
        for i in range(len(columns)):
            col = columns[i]
            if col in columns_to_exclude:
                continue
            nan_sum_col = df[col].isnull().sum()
            if nan_sum_col == len(df):
                df.drop(col, axis=1, inplace=True)
        return df
    
    def drop_nans_rows(df):
        '''This function will drop the rows having NaNs'''
        print("Before removing missing values:")
        print("number of rows:", df.shape[0], "\nnumber of columns:", df.shape[1])
        df = df.dropna(how='any')
        print("After removing missing values:")
        print("number of rows:", df.shape[0], "\nnumber of columns:", df.shape[1])
        return df
        
    def get_all_matching_columns(df, keyword):
        return df.filter(like=keyword).columns

    def generate_lags(df, column, lags_count): 
        for i in range(lags_count):
            lag_name = column + "-" + str(i + 1)
            df[lag_name] = df[column].shift(i + 1)
#             for j in range(i):
#                 df.loc[str(j+1), lag_name] = np.nan
#         df = df.dropna(how='any')
        return df

    def add_LE_conversion_rate(df, col):
        conversion_rate = 28.94
        new_col = col + "(mm)"
        df[new_col] = df[col] / conversion_rate
        return df

    def read_sites_data():
        file_path = os.path.join(base_path, "filtered_sites_all.xlsx")
        df = pd.read_excel(file_path)
        df.head()
        return df

    def export_data(df, file_path):
        export_path = os.path.join(base_path, file_path + ".csv")
        export_csv = df.to_csv(export_path, index=None, header=True)

    def load_data(file_path):
        df = pd.read_csv(file_path + ".csv", delimiter=',')
        return df
    
    def list_to_df(list_to_convert):
        '''This function will convert the provided list into a dataframe'''
        df = pd.concat(list_to_convert, sort=True)
        return df
    
    def get_files_directory(dirName):
    # create a list of file and sub directories 
    # names in the given directory 
        listOfFile = os.listdir(dirName)
        allFiles = list()
        # Iterate over all the entries
        for entry in listOfFile:
            # Create full path
            if entry.endswith(".xlsx") or entry.endswith(".icloud") or entry.endswith(".DS_Store"):
                continue
            fullPath = os.path.join(dirName, entry)
            # If entry is a directory then get the list of files in this directory 
            if os.path.isdir(fullPath):
                allFiles = allFiles + Helpers.get_files_directory(fullPath)
            else:
                allFiles.append(fullPath)

        return allFiles

    def concat_dataframe_from_files(files, skipRowsNum, split_num):
        values = []
        for i in range(len(files)):
            file_path = files[i]
            head, file_name = os.path.split(file_path)
            #Get only the sheets having the variables
            if file_name.endswith(".csv"):
#                 print("file name", file_name)
                df = pd.read_csv(file_path, delimiter=',', skiprows=skipRowsNum)
                site_id = file_name.split("_")[split_num]
#                 print("site id in file:", site_id)
                df["Site Id"] = site_id
                values.append(df)
        return Helpers.list_to_df(values)   
    
    def generate_dataframe_from_files(dirName, skipRowsNum = 0, split_num = 0):
        files = Helpers.get_files_directory(dirName)
        df = Helpers.concat_dataframe_from_files(files, skipRowsNum, split_num)
        return df
        

In [4]:
class Ameriflux:

    def __init__(self, folder_path, skipRowsNum, split_num, lags_count, is_hourly, is_joint, output_name):
        print("Initializer")
        self.folder_path = folder_path
        self.skipRowsNum = skipRowsNum
        self.split_num = split_num
        self.lags_count = lags_count
        self.is_hourly = is_hourly
        self.is_joint = is_joint
        self.output_name = output_name
        
    
    def impute_temperature(self, df):
        '''This function imputes the temperature by the mean when TA is negative otherwise set it to 0 
        if the mean is negative'''
        columns_list = ["TA"]

        #Get the mean air temperature, if less than zero
        #fall back to zero and then delete the mean column
        for i in range(len(columns_list)):
            col = columns_list[i]
            new_col = col + "-avg"
            df[new_col] = df[col].mean()
            df[new_col] = np.where(df[new_col] < 0, 0, df[new_col])
            df[col] = np.where(df[col] < 0, df[new_col], df[col])
            
        #Drop the new mean columns that are generated temporarly
        new_columns_lists = []
        for i in range(len(columns_list)):
            col = columns_list[i]
            new_col = col + "-avg" 
            new_columns_lists.append(new_col)
        df.drop(new_columns_lists, axis=1, inplace=True)
        
        return df
        
    def transform_input_variants(self, df):
        '''This function gets all the input column variants'''
        ws_list = list(Helpers.get_all_matching_columns(df, "WS_"))
        rh_list = list(Helpers.get_all_matching_columns(df, "RH_"))
        ta_list = list(Helpers.get_all_matching_columns(df, "TA_"))
        g_list = list(Helpers.get_all_matching_columns(df, "G_"))
        h_list = [col for col in df if col.startswith('H_')]
        netrad_list = list(Helpers.get_all_matching_columns(df, "NETRAD_"))
        
        print("ws_list", ws_list)
        print("rh_list", rh_list)
        print("ta_list", ta_list)
        print("g_list", g_list)
        print("h_list", h_list)
        print("netrad_list", netrad_list)
        
        df = self.group_input_variants(df, ws_list, "WS")
        df = self.group_input_variants(df, rh_list, "RH")
        df = self.group_input_variants(df, ta_list, "TA")
        df = self.group_input_variants(df, g_list, "G")
        df = self.group_input_variants(df, h_list, "H")
        df = self.group_input_variants(df, netrad_list, "NETRAD")
        df = self.impute_temperature(df)
        print("After grouping", df.columns)
        return df
        
    def group_input_variants(self, df, variant_list, mean_column):
        '''This function imputes all the input columnn variants with the mean of them and drop the variants'''
        if len(variant_list) > 1:
            df[mean_column] = ""
            df[mean_column] = df[variant_list].mean(axis=1)
        elif len(variant_list) > 0:
             df[mean_column] = df[variant_list[0]]
        
        df = df.drop(variant_list, axis=1)
        return df
    
    def add_LE_converstion(self, df):
        '''This function adds the conversion for LE incase LE exists and generate lags 
        for it after adding the conversion'''
        columns_to_drop = (list(df.filter(like='LE_').columns))
        df.drop(columns_to_drop, axis=1, inplace=True)
        if "LE" in df.columns:
            df = Helpers.add_LE_conversion_rate(df, "LE")   
        return df
        
    def remove_unneeded_columns_hourly(self, df):
        #Remove rows having NaNs
        df = Helpers.drop_nans_rows(df)
        df = self.add_LE_converstion(df)
        return df
    
    def drop_invalid_columns(self, df):
        '''This function will remove un-needed columns that have different unit of measure than the
        other variants so they should be dropped before grouping variants'''
        ssitc_list = list(Helpers.get_all_matching_columns(df, "_SSITC_TEST"))
        max_list = list(Helpers.get_all_matching_columns(df, "WS_MAX"))
        columnsToDrop = []
        columnsToDrop.extend(ssitc_list)
        columnsToDrop.extend(max_list)
        print("columns to drop", columnsToDrop)
        df = df.drop(columnsToDrop, axis=1)
        return df
        
    def generate_hourly_data(self, df):
        '''This function will process the half-hourly data'''
        print("df shape:", df.shape)
        df = Helpers.convert_missing_values_nan(df)
        #Remove rows having NaNs
        df = self.drop_invalid_columns(df)
        df = Helpers.drop_nans_rows(df)
        df = self.transform_input_variants(df)
#         df = self.remove_unneeded_columns_hourly(df)
        return df
        
    def generate_site_data(self, sites_df):
        files = Helpers.get_files_directory(self.folder_path)
        for i in range(len(files)):
            file_path = files[i]
            head, file_name = os.path.split(file_path)
            #Get only the sheets having the variables
            if file_name.endswith(".csv"):
                df_filt = pd.read_csv(file_path, delimiter=',', skiprows=self.skipRowsNum)
                site_id = file_name.split("_")[self.split_num]
                df_filt["Site Id"] = site_id
                df_filt = self.generate_hourly_data(df_filt)
                #Concat all hours updated to a list
                if (len(df_filt) > 0) and ("LE" in df_filt.columns) :
                    print("Site:", site_id)
                    file_name = os.path.join(self.output_name, site_id + "_Hourly")
                    Helpers.export_data(df_filt, file_name) 

In [5]:
if __name__ == "__main__":
    is_hourly = True #Boolean to indicate if the data is hourly or daily
    skipRowsNum = 0 #Defaults to zero, incase excel has meaningless rows to skip
    split_num = 0 #The index to read the name of the site, defaults to 0
    lags_count = 5 #The number of lags to generate the data for 
    output_name = os.path.join("/Users/saraawad/Desktop/flux-data-qaqc/sites/", "data/")
    am = Ameriflux(hourly_classified_path, skipRowsNum, split_num, lags_count, is_hourly, False, output_name)
    sites_df = Helpers.read_sites_data()
    am.generate_site_data(sites_df)

Initializer
df shape: (144768, 52)
columns to drop ['FC_SSITC_TEST', 'FCH4_SSITC_TEST', 'H_SSITC_TEST', 'LE_SSITC_TEST', 'TAU_SSITC_TEST_PI_F']
Before removing missing values:
number of rows: 144768 
number of columns: 47
After removing missing values:
number of rows: 50155 
number of columns: 47
ws_list []
rh_list ['RH_PI_F']
ta_list []
g_list []
h_list ['H_PI_F']
netrad_list []
After grouping Index(['TIMESTAMP_END', 'CO2', 'H2O', 'CH4', 'FC', 'FCH4', 'G', 'H', 'LE',
       'WD', 'WS', 'USTAR', 'ZL', 'TAU', 'MO_LENGTH', 'V_SIGMA', 'W_SIGMA',
       'PA', 'TA', 'VPD_PI', 'T_SONIC', 'T_SONIC_SIGMA', 'TS_PI_1', 'TS_PI_2',
       'TS_PI_3', 'TS_PI_4', 'TS_PI_5', 'WTD', 'NETRAD', 'PPFD_IN', 'PPFD_OUT',
       'SW_IN', 'P', 'FC_PI_F', 'RECO_PI_F', 'GPP_PI_F', 'FCH4_PI_F',
       'LE_PI_F', 'Site Id', 'Category', 'Year', 'Month', 'Day', 'Date',
       'Timestamp start', 'RH'],
      dtype='object')
Site: US-Twt
df shape: (65700, 28)
columns to drop []
Before removing missing values:
number o

df shape: (52608, 57)
columns to drop []
Before removing missing values:
number of rows: 52608 
number of columns: 57
After removing missing values:
number of rows: 7589 
number of columns: 57
ws_list []
rh_list []
ta_list []
g_list ['G_1_1_1', 'G_1_1_2', 'G_1_1_3', 'G_1_1_4', 'G_PI_1_1_A']
h_list []
netrad_list []
After grouping Index(['TIMESTAMP_END', 'WS', 'U_SIGMA', 'V_SIGMA', 'W_SIGMA', 'WD', 'TA',
       'T_CANOPY', 'RH', 'PA', 'T_SONIC', 'T_SONIC_SIGMA', 'P_RAIN', 'CO2',
       'H2O', 'FC', 'NEE_PI', 'H', 'LE', 'USTAR', 'ZL', 'SW_IN', 'SW_OUT',
       'LW_IN', 'LW_OUT', 'SW_DIF', 'SW_DIR', 'PPFD_IN', 'PPFD_OUT', 'NETRAD',
       'NDVI', 'PRI', 'ALB', 'TS_1_1_1', 'TS_1_1_2', 'TS_PI_1_1_A', 'TS_1_2_1',
       'TS_1_2_2', 'TS_PI_1_2_A', 'SWC_1_1_1', 'SWC_1_1_2', 'SWC_PI_1_1_A',
       'SWC_1_2_1', 'SWC_1_2_2', 'SWC_PI_1_2_A', 'Site Id', 'Category', 'Year',
       'Month', 'Day', 'Date', 'Timestamp start', 'G'],
      dtype='object')
Site: US-A32
df shape: (290784, 79)
columns to dr

columns to drop []
Before removing missing values:
number of rows: 154847 
number of columns: 72
After removing missing values:
number of rows: 0 
number of columns: 72
ws_list []
rh_list []
ta_list ['TA_1_1_2', 'TA_1_1_3', 'TA_1_1_1']
g_list ['G_1_1_1', 'G_1_1_2', 'G_1_1_3']
h_list []
netrad_list []
After grouping Index(['TIMESTAMP_END', 'FC', 'LE', 'H', 'CO2', 'H2O', 'PA', 'T_SONIC',
       'T_SONIC_SIGMA', 'RH', 'P', 'PPFD_IN', 'SW_IN', 'SW_OUT', 'LW_IN',
       'LW_OUT', 'NETRAD', 'ALB', 'SWC_1_1_1', 'SWC_1_2_1', 'SWC_1_3_1',
       'SWC_1_4_1', 'SWC_1_5_1', 'SWC_2_1_2', 'SWC_2_2_2', 'SWC_2_3_2',
       'SWC_3_4_2', 'SWC_2_5_2', 'SWC_3_1_3', 'SWC_3_2_3', 'SWC_3_3_3',
       'SWC_3_4_3', 'SWC_3_5_3', 'SWC_4_1_1', 'SWC_5_1_1', 'SWC_6_1_1',
       'TS_1_1_1', 'TS_1_2_1', 'TS_1_3_1', 'TS_1_4_1', 'TS_1_5_1', 'TS_1_6_1',
       'D_SNOW', 'USTAR', 'U_SIGMA', 'V_SIGMA', 'W_SIGMA', 'WS', 'WD',
       'SWC_1_1_2', 'SWC_1_2_2', 'SWC_1_3_2', 'SWC_1_4_2', 'SWC_1_5_2',
       'SWC_1_1_3', 'SWC_1

In [38]:
class GenerateConfig:

    def __init__(self, folder_path, output_name):
        print("Initializer")
        self.folder_path = folder_path
        self.output_name = output_name
        
    def are_all_main_vars_exists(self, df):
        columnsToAdd = ["NETRAD", "H", "LE", "G", "RH", "WS", "TA"]
#         ,
#                         "SW_IN", "SW_OUT", "LW_IN", "LW_OUT", "VPD_PI"]
        if all([item in df.columns for item in columnsToAdd]):
            return True
        return False

    def group_input_variants(self, df, variant_list, mean_column):
        '''This function imputes all the input columnn variants with the mean of them and drop the variants'''
        if len(variant_list) > 1:
            df[mean_column] = ""
            df[mean_column] = df[variant_list].mean(axis=1)
        elif len(variant_list) > 0:
             df[mean_column] = df[variant_list[0]]
        
        df = df.drop(variant_list, axis=1)
        return df
    
    def generate_config(self, site_main_df, site_var_df):
        site_id = site_main_df["Site Id"].unique()[0]
        latitude = site_main_df["Latitude"].unique()[0]
        longitude = site_main_df["Longitude"].unique()[0]
        elevation = site_main_df["Elevation(m)"].unique()[0]

        site_path = os.path.join("sites/config", site_id + ".ini")
        f = open(site_path, "w")
        f.write("[METADATA]\n")
        f.write("climate_file_path = /Users/saraawad/Desktop/flux-data-qaqc/sites/data/" + site_id +"_Hourly.csv\n")
        f.write("station_latitude = " + str(latitude) + "\n")
        f.write("station_longitude = " + str(longitude) + "\n")
        f.write("station_elevation = " + str(elevation) + "\n")
        f.write("missing_data_value = -9999\n")
        f.write("skiprows = 0\n")
        f.write("date_parser = %Y%m%d%H%M\n")
        f.write("site_id = " + site_id + "\n")
        f.write("gridmet_file_path = /Users/saraawad/Desktop/flux-data-qaqc/sites/config/gridMET_data/"
                + site_id + ".csv")

        f.write("\n")
        f.write("[DATA]\n")
        f.write("datestring_col = Timestamp start\n")
        f.write("net_radiation_col = NETRAD\n")
        f.write("net_radiation_units = w/m2\n")
        f.write("sensible_heat_flux_col = H\n")
        f.write("sensible_heat_flux_units = w/m2\n")
        f.write("latent_heat_flux_col = LE\n")
        f.write("latent_heat_flux_units = w/m2\n")
        f.write("ground_flux_col = G\n")
        f.write("ground_flux_units = w/m2\n")
        
        sw_list = list(Helpers.get_all_matching_columns(site_var_df, "SW_IN"))
        site_var_df = self.group_input_variants(site_var_df, sw_list, "SW_IN")
        if len(sw_list) > 0:
            f.write("shortwave_in_col = SW_IN\n")
            f.write("shortwave_in_units = w/m2\n")
            
        sw_out_list = list(Helpers.get_all_matching_columns(site_var_df, "SW_OUT"))
        site_var_df = self.group_input_variants(site_var_df, sw_out_list, "SW_OUT")
        if len(sw_out_list) > 0:
            f.write("shortwave_out_col = SW_OUT\n")
            f.write("shortwave_out_units = w/m2\n")
            
          
        ln_in_list = list(Helpers.get_all_matching_columns(site_var_df, "LW_IN"))
        site_var_df = self.group_input_variants(site_var_df, ln_in_list, "LW_IN")
        if len(ln_in_list) > 0:
            f.write("longwave_in_col = LW_IN\n")
            f.write("longwave_in_units = w/m2\n")
        
        ln_out_list = list(Helpers.get_all_matching_columns(site_var_df, "LW_OUT"))
        site_var_df = self.group_input_variants(site_var_df, ln_out_list, "LW_OUT")
        if len(ln_out_list) > 0:
            f.write("longwave_out_col = LW_OUT\n")
            f.write("longwave_out_units = w/m2\n")
        
        vdp_list = list(Helpers.get_all_matching_columns(site_var_df, "VPD_PI"))
        site_var_df = self.group_input_variants(site_var_df, vdp_list, "VPD_PI")
        if len(vdp_list) > 0:
            f.write("vap_press_def_col = VPD_PI\n")
            f.write("vap_press_def_units = hPa\n")
            
        f.write("avg_temp_col = T_SONIC\n")
        f.write("avg_temp_units = C\n")
        
        ta_list = list(Helpers.get_all_matching_columns(site_var_df, "TA"))
        site_var_df = self.group_input_variants(site_var_df, ta_list, "TA")
        if len(ta_list) > 0:
            f.write("temp_col = TA\n")
            f.write("temp_units = C\n")
       
        f.write("rel_humidity_col = RH\n")
        f.write("rel_humidity_units = (%): Relative humidity, range 0-100\n")
        f.write("wind_spd_col = WS\n")
        f.write("wind_spd_units = m/s\n")
        f.close() 
        
        
    def generate_site_data(self):
        files = Helpers.get_files_directory(self.folder_path)
        sites_df = Helpers.read_sites_data()
        for i in range(len(files)):
            file_path = files[i]
            head, file_name = os.path.split(file_path)
            #Get only the sheets having the variables
            if file_name.endswith(".csv"):
                df_filt = pd.read_csv(file_path, delimiter=',')
                site_id = file_name.split("_")[0]
                df_filt["Site Id"] = site_id
                print("Site:", site_id)
                site_df = sites_df[sites_df["Site Id"] == site_id]
                if self.are_all_main_vars_exists(df_filt):
                    self.generate_config(site_df, df_filt)

In [39]:
if __name__ == "__main__":
    input_name =  os.path.join("/Users/saraawad/Desktop/flux-data-qaqc/sites/", "data/")
    output_name = os.path.join("/Users/saraawad/Desktop/flux-data-qaqc/sites/", "output/")
    gc = GenerateConfig(input_name, output_name)
    gc.generate_site_data()

Initializer
Site: US-Ced
Site: US-Shd
Site: US-Myb
Site: US-Bi2
Site: US-A32
Site: US-Tw2
Site: US-Pon
Site: US-Skr
Site: US-Snd
Site: US-AR2
Site: US-Goo
Site: US-Wlr
Site: US-A74
Site: US-AR1
Site: US-Kon
Site: US-Wgr
Site: US-SO2
Site: US-SP2
Site: US-Var
Site: US-Twt


In [10]:
class FluxQaQcCorrection:

    def __init__(self, folder_path, output_name, correction_method=1):
        print("Initializer")
        self.folder_path = folder_path
        self.output_name = output_name
        self.correction_method = correction_method
        
    def load_config(self, site_id, path):
        d = Data(path)
        print(d)
        return d
        
    def ebr_correct_data(self, d):
        q = QaQc(d, drop_gaps=True)
#         q.correct_data(meth='ebr', etr_gap_fill=False)
#         ebr_notgapfilled = q.df
#         q.write()
        
        q.correct_data(meth='ebr', etr_gap_fill=True)
        ebr_gapfilled = q.df
        q.write()
        
    def bowen_correct_data(self, d):
        q = QaQc(d, drop_gaps=True)
        
#         q.correct_data(meth='br', etr_gap_fill=False)
#         br_notgapfilled = q.df
#         q.write()
        
        q.correct_data(meth='br', etr_gap_fill=True)
        br_gapfilled = q.df
        q.write()
        
        
#         print("Out dir:", q.out_dir)
#         print(q.df.columns)
        
    def generate_correction(self):
        files = Helpers.get_files_directory(self.folder_path)
        group_sites_list = []
        for i in range(len(files)):
            file_path = files[i]
            head, file_name = os.path.split(file_path)
            site_id = file_name.split(".")[0]
            print("site id:", site_id)
            try:
                d = self.load_config(site_id, file_path)
                if self.correction_method == 1:
                    self.ebr_correct_data(d)
                else:
                    self.bowen_correct_data(d)
            except Exception as ex:
                print("Exception:", str(ex))
                pass

<h3> EBR Correction </h3>

In [11]:
if __name__ == "__main__":
    input_name = os.path.join("/Users/saraawad/Desktop/flux-data-qaqc/sites/", "config/")
    output_name = os.path.join("/Users/saraawad/Desktop/flux-data-qaqc/sites/", "data/")
    correction_method = 1
    gc = FluxQaQcCorrection(input_name, output_name, correction_method)
    gc.generate_correction()

Initializer
site id: US-Tw2
<fluxdataqaqc.data.Data object at 0x124b60588>
T_SONIC
They will be filled with NaN values
Converting vpd from hpa to kpa

The input data temporal frequency was not detected.

Data is being resampled to daily temporal frequency.
Linearly interpolating gaps in energy balance components up to 4 hours when Rn < 0 and up to 2 hours when Rn >= 0.
gridMET reference ET already downloaded for station at:
/Users/saraawad/Desktop/flux-data-qaqc/sites/config/gridMET_data/US-Tw2_38.1083N_-121.6417W.csv
not redownloading.
Gap filling ET_corr with filtered ETrF x ETr (gridMET)
site id: US-SP2
<fluxdataqaqc.data.Data object at 0x1245834e0>
T_SONIC
They will be filled with NaN values
Converting vpd from hpa to kpa

The input data temporal frequency was not detected.

Data is being resampled to daily temporal frequency.
Linearly interpolating gaps in energy balance components up to 4 hours when Rn < 0 and up to 2 hours when Rn >= 0.
gridMET reference ET already downloaded fo

Filtering days with less then 100.0% or 48/48 sub-daily measurements
gridMET reference ET already downloaded for station at:
/Users/saraawad/Desktop/flux-data-qaqc/sites/config/gridMET_data/US-AR2_36.6500N_-99.6000W.csv
not redownloading.
Gap filling ET_corr with filtered ETrF x ETr (gridMET)
site id: US-AR1
<fluxdataqaqc.data.Data object at 0x123b5a518>
VPD_PI T_SONIC
They will be filled with NaN values

The input data temporal frequency was not detected.
Data is being resampled to daily temporal frequency.
Linearly interpolating gaps in energy balance components up to 4 hours when Rn < 0 and up to 2 hours when Rn >= 0.
Filtering days with less then 100.0% or 46/46 sub-daily measurements
gridMET reference ET already downloaded for station at:
/Users/saraawad/Desktop/flux-data-qaqc/sites/config/gridMET_data/US-AR1_36.4417N_-99.4333W.csv
not redownloading.
Gap filling ET_corr with filtered ETrF x ETr (gridMET)
site id: US-SO2
<fluxdataqaqc.data.Data object at 0x123b3dcc0>
T_SONIC
They w

site id: US-Skr
<fluxdataqaqc.data.Data object at 0x124b7b2e8>
T_SONIC
They will be filled with NaN values
Temporal frequency of data > hourly cannot calculate VP/VPD

The input data temporal frequency was not detected.

Data is being resampled to daily temporal frequency.
Linearly interpolating gaps in energy balance components up to 4 hours when Rn < 0 and up to 2 hours when Rn >= 0.
Converting vpd from hpa to kpa
 /Users/saraawad/Desktop/flux-data-qaqc/sites/config/gridMET_data/US-Skr.csv
Downloading gridMET var: daily_mean_reference_evapotranspiration_alfalfa

Downloading gridMET var: precipitation_amount

Gap filling ET_corr with filtered ETrF x ETr (gridMET)
site id: US-Bi2
<fluxdataqaqc.data.Data object at 0x123603b38>
Converting vpd from hpa to kpa
Calculating vapor pressure from vapor pressure deficit and air temperature

The input data temporal frequency was not detected.
Data is being resampled to daily temporal frequency.
Linearly interpolating gaps in energy balance compon

<h3> Bowen Ratio </h3>

In [12]:
if __name__ == "__main__":
    input_name = os.path.join("/Users/saraawad/Desktop/flux-data-qaqc/sites/", "config/")
    output_name = os.path.join("/Users/saraawad/Desktop/flux-data-qaqc/sites/", "data/")
    correction_method = 2
    gc = FluxQaQcCorrection(input_name, output_name, correction_method)
    gc.generate_correction()

Initializer
site id: US-Tw2
<fluxdataqaqc.data.Data object at 0x123b42b38>
T_SONIC
They will be filled with NaN values
Converting vpd from hpa to kpa

The input data temporal frequency was not detected.

Data is being resampled to daily temporal frequency.
Linearly interpolating gaps in energy balance components up to 4 hours when Rn < 0 and up to 2 hours when Rn >= 0.
gridMET reference ET already downloaded for station at:
/Users/saraawad/Desktop/flux-data-qaqc/sites/config/gridMET_data/US-Tw2_38.1083N_-121.6417W.csv
not redownloading.
Gap filling ET_corr with filtered ETrF x ETr (gridMET)
site id: US-SP2
<fluxdataqaqc.data.Data object at 0x12398acc0>
T_SONIC
They will be filled with NaN values
Converting vpd from hpa to kpa

The input data temporal frequency was not detected.

Data is being resampled to daily temporal frequency.
Linearly interpolating gaps in energy balance components up to 4 hours when Rn < 0 and up to 2 hours when Rn >= 0.
gridMET reference ET already downloaded fo

gridMET reference ET already downloaded for station at:
/Users/saraawad/Desktop/flux-data-qaqc/sites/config/gridMET_data/US-Goo_34.2750N_-89.8917W.csv
not redownloading.
Gap filling ET_corr with filtered ETrF x ETr (gridMET)
site id: US-Shd
<fluxdataqaqc.data.Data object at 0x124aeaef0>
T_SONIC
They will be filled with NaN values
Temporal frequency of data > hourly cannot calculate VP/VPD

The input data temporal frequency was not detected.

Data is being resampled to daily temporal frequency.
Linearly interpolating gaps in energy balance components up to 4 hours when Rn < 0 and up to 2 hours when Rn >= 0.
Converting vpd from hpa to kpa
gridMET reference ET already downloaded for station at:
/Users/saraawad/Desktop/flux-data-qaqc/sites/config/gridMET_data/US-Shd_36.9417N_-96.6833W.csv
not redownloading.
Gap filling ET_corr with filtered ETrF x ETr (gridMET)
site id: US-AR2
<fluxdataqaqc.data.Data object at 0x1235992b0>
VPD_PI T_SONIC
They will be filled with NaN values

The input data 

Temporal frequency of data > hourly cannot calculate VP/VPD

The input data temporal frequency was not detected.
Data is being resampled to daily temporal frequency.
Linearly interpolating gaps in energy balance components up to 4 hours when Rn < 0 and up to 2 hours when Rn >= 0.
Filtering days with less then 100.0% or 17/17 sub-daily measurements
Converting vpd from hpa to kpa
gridMET reference ET already downloaded for station at:
/Users/saraawad/Desktop/flux-data-qaqc/sites/config/gridMET_data/US-Snd_38.0250N_-121.7667W.csv
not redownloading.
Gap filling ET_corr with filtered ETrF x ETr (gridMET)
site id: US-Ced
<fluxdataqaqc.data.Data object at 0x124aea588>
T_SONIC
They will be filled with NaN values
Converting vpd from hpa to kpa

The input data temporal frequency was not detected.
Data is being resampled to daily temporal frequency.
Linearly interpolating gaps in energy balance components up to 4 hours when Rn < 0 and up to 2 hours when Rn >= 0.
Filtering days with less then 100.

In [122]:
Helpers.read_sites_data()

Unnamed: 0,Site Id,Latitude,Longitude,Elevation(m),Climate Koeppen,Mean Annual Temp (Â°C),Mean Annual Precip. (mm):,Flux Species Measured:,Years Data Collected:,Description
0,US-ARM,36.6058,-97.4888,314.0,Cfa (Humid Subtropical: mild with no dry seaso...,14.76,843.0,"CO2, H2O",2002 - 2019,Central facility tower crop field (winter whea...
1,US-AR2,36.6358,-99.5975,646.0,Dsa (Dry Continental: hot summer),,,"CO2, H, H2O",2009 - 2012,The ARM USDA UNL OSU Woodward Switchgrass 2 to...
2,US-KFS,39.0561,-95.1907,310.0,Cfa (Humid Subtropical: mild with no dry seaso...,12.0,1014.0,"CO2, H2O",2007 - 2019,The study is an abandoned grassland at the Kan...
3,US-Wgr,45.112865,-122.656026,52.0,"Csb (Mediterranean: mild with dry, warm summer)",11.58,1194.0,"CO2, H, H2O",2014 - 2019,he site was established in summer 2014 and is ...
4,US-Kon,39.0824,-96.5603,417.0,Cfa (Humid Subtropical: mild with no dry seaso...,12.77,867.0,"CO2, H2O",2006 - 2019,Burned on an annual basis. Bison reintroduced ...
5,US-Bi2,38.109,-121.535,-4.98,"Csa (Mediterranean: mild with dry, hot summer)",16.0,338.0,"CO2, CH4, H2O",2017 - 2019,corn is growing on an island in the Sacramento...
6,US-Me2,44.4523,-121.5574,1253.0,"Csb (Mediterranean: mild with dry, warm summer)",6.28,523.0,"CO2, H2O",2002 - 2019,The mean stand age is 71 years old and the sta...
7,US-A74,36.808464,-97.548854,337.0,Cfa (Humid Subtropical: mild with no dry seaso...,33.9,889.0,"CO2, H, H2O",2016 - 2019,This site is located near the ARM SGP Central ...
8,US-HRC,34.585722,-91.747528,,Cfa (Humid Subtropical: mild with no dry seaso...,,,"CO2, CH4, H2O",2017 - 2017,Conventional flood irrigation method on a rice...
9,US-SO2,33.3738,-116.6228,1394.0,"Csa (Mediterranean: mild with dry, hot summer)",13.63,553.0,"CO2, H2O",1997 - 2019,The Sky Oaks Old site is located near the Sky ...
