# Start


This script prepares houly Vapor Pressure Deficit (VPD) data from ERATOS and matches it with VPD from field observations for subsequent analysis.


In [None]:
working_dir = '../..'  # This repository's root directory
input_file_name = 'in-situ_topography_fc.csv'
output_file_name = 'eratos_vpd_fc.csv'

eratos_rh_path = 'Data/eratos/ANU_CombinedSites_RH.csv'
eratos_temp_path = 'Data/eratos/ANU_CombinedSites_Temp.csv'
eratos_sdi_path = 'Data/eratos/ANU_CombinedSites_SDI.csv'  # daily

In [None]:
import sys

sys.path.append(working_dir)
import os

from tqdm import tqdm

tqdm.pandas()
import numpy as np
import pandas as pd

from Utils.datetime import add_UTC_Datetime
from Utils.vpd import calculate_vpd

# Loading in-situ data


In [None]:
# Load in-situ_topography.csv as the main df

df = pd.read_csv(os.path.join(working_dir, "output", "csv", input_file_name))
# df = df[~df['VPD'].isna()]
df['Datetime'] = pd.to_datetime(df['Datetime'])
if 'UTC_Datetime' in df.columns:
    df['UTC_Datetime'] = pd.to_datetime(df['UTC_Datetime'])
else:
    df = add_UTC_Datetime(df)
df.head()

# Loading ERATOS data

In [None]:
eratos_rh_data_dir = os.path.join(working_dir, eratos_rh_path)
eratos_rh_df = pd.read_csv(eratos_rh_data_dir)
eratos_rh_df.head()

In [None]:
eratos_temp_data_dir = os.path.join(working_dir, eratos_temp_path)
eratos_temp_df = pd.read_csv(eratos_temp_data_dir)
eratos_temp_df.head()

# Combining in-situ and remote data into a single dataframe

In [None]:
# Fill in df with eratos data


# For each row, open eratos data csv file one-by-one to get data
def get_eratos_value(row, eratos_df):
    SiteID_str = str(row['SiteID'])
    SiteID_str = ''.join(c for c in SiteID_str if c.isdigit())

    target_time = row['UTC_Datetime'].round('1h').strftime('%Y-%m-%d %H:%M:%S')
    try:
        eratos_value = eratos_df.loc[eratos_df['Date_time'] == target_time, SiteID_str].values[0]
        return eratos_value
    except (IndexError, KeyError):
        return np.nan


df['eratos_Temperature'] = df.progress_apply(
    lambda row: get_eratos_value(row, eratos_temp_df), axis=1
)
df['eratos_RH'] = df.progress_apply(lambda row: get_eratos_value(row, eratos_rh_df), axis=1)
df.head()

In [None]:
# Investigate the result - There are only NaNs in veg_cover column
df[df.isna().any(axis=1)]

# Calculating remote VPD from remote temperature and remote relative humidity

In [None]:
df['eratos_VPD'] = df.apply(
    lambda row: calculate_vpd(row['eratos_Temperature'], row['eratos_RH']), axis=1
)
df.head()

# Save the resulting dataframes

In [None]:
df = df[~df['eratos_VPD'].isna()]
df.to_csv(os.path.join(working_dir, "output", "csv", output_file_name), index=False)