# Start

This script prepares hourly Vapor Pressure Deficit (VPD) data from BARRA2 and matches it with VPD from field observations for subsequent analysis.

In [None]:
working_dir = '../..'  # This repository's root directory
input_file_name = 'in-situ_topography_pcs.csv'
output_file_name = 'barra2_vpd_pcs.csv'

download_barra2_data = True  # around 30 min in Colab
explore_barra2_data = False

In [None]:
import os
import sys

from tqdm import tqdm

tqdm.pandas()

sys.path.append(working_dir)
from Utils.barra2 import *
from Utils.datetime import add_UTC_Datetime
from Utils.vpd import calculate_vpd

barra2_data_dir = os.path.join(working_dir, 'Data', 'barra2')

# Loading in-situ and remote data


In [None]:
# Load in-situ_topography.csv as the main df

df = pd.read_csv(os.path.join(working_dir, "output", "csv", input_file_name))
df = df[~df['VPD'].isna()]
df['Datetime'] = pd.to_datetime(df['Datetime'])
if 'UTC_Datetime' in df.columns:
    df['UTC_Datetime'] = pd.to_datetime(df['UTC_Datetime'])
else:
    df = add_UTC_Datetime(df)

first_datetime, last_datetime = min(df['Datetime']), (max(df['Datetime']) + pd.Timedelta(days=1))
print(
    "First date: ",
    first_datetime.strftime("%Y%m%d"),
    ", last date: ",
    last_datetime.strftime("%Y%m%d"),
)

df.head()

## Downloading BARRA2 data

In [None]:
# Find the nearest barra2 grid for each site

barra2_lats, barra2_lons = get_barra2_grid_point(working_dir)
df[['barra2_X', 'barra2_Y']] = df.apply(
    lambda row: pd.Series(
        find_nearest_barra2_grid_point(row['X'], row['Y'], barra2_lons, barra2_lats)
    ),
    axis=1,
)

In [None]:
# Download all barra2 data

# List all barra2 cells that we want their data
barra2_cell_locations_list = list(set((x, y) for x, y in df[['barra2_X', 'barra2_Y']].values))
print("barra2_cell_locations_list length: ", len(barra2_cell_locations_list))
print("barra2_cell_locations_list: ", [(str(x), str(y)) for x, y in barra2_cell_locations_list])

if download_barra2_data:
    vars = ['tas', 'hurs']
    download_all_barra2_data(
        vars, barra2_cell_locations_list, first_datetime, last_datetime, barra2_data_dir
    )

In [None]:
# # BARRA2 data exploration

# if explore_barra2_data:
#     barra2_data_dir = os.path.join(working_dir, "Data", "barra2")
#     barra2_df = pd.read_csv(os.path.join(barra2_data_dir, os.listdir(barra2_data_dir)[0]))
#     barra2_df.info()

In [None]:
# # BARRA2 netCDF data exploration

# import xarray as xr

# if explore_barra2_data:
#     tas_url = "https://thredds.nci.org.au/thredds/fileServer/ob53/output/reanalysis/AUST-04/BOM/ERA5/historical/hres/BARRA-C2/v1/1hr/tas/latest/tas_AUST-04_ERA5_historical_hres_BOM_BARRA-C2_v1_1hr_201812-201812.nc"
#     hurs_url = "https://thredds.nci.org.au/thredds/fileServer/ob53/output/reanalysis/AUST-04/BOM/ERA5/historical/hres/BARRA-C2/v1/1hr/hurs/latest/hurs_AUST-04_ERA5_historical_hres_BOM_BARRA-C2_v1_1hr_201812-201812.nc"
#     !curl -L {tas_url} -o "barra2_tas.nc"
#     !curl -L {hurs_url} -o "barra2_hurs.nc"

#     tas_ds = xr.open_dataset("barra2_tas.nc")
#     hurs_ds = xr.open_dataset("barra2_hurs.nc")
#     tas_ds

In [None]:
# if explore_barra2_data:
#     hurs_ds

# Combining in-situ and remote data into a single dataframe

In [None]:
# For each row, open barra2 data csv file one-by-one to get data (40 min)

df['barra2_Temperature'] = df.progress_apply(
    lambda row: get_barra2_value(row, 'tas', barra2_data_dir), axis=1
)
df['barra2_RH'] = df.progress_apply(
    lambda row: get_barra2_value(row, 'hurs', barra2_data_dir), axis=1
)
df.head()

In [None]:
# Investigate null values.
df[df.isna().any(axis=1)]

# Calculating remote VPD from remote temperature and remote relative humidity

In [None]:
df['barra2_VPD'] = df.apply(
    lambda row: calculate_vpd(row['barra2_Temperature'], row['barra2_RH']), axis=1
)
df.head()

# Save the resulting dataframes

In [None]:
df.to_csv(os.path.join(working_dir, "output", "csv", output_file_name), index=False)