In [None]:
working_dir = '../..'  # This repository's root directory
input_file_name = 'in-situ_topography_pcs.csv'
output_file_name = 'afdrs_dfmc_pcs.csv'

afdrs_path = 'Data/afdrs/ACT_Extractions_t_rh.csv'

In [None]:
import sys

sys.path.append(working_dir)
import os

from tqdm import tqdm

tqdm.pandas()
import numpy as np
import pandas as pd

from Utils.datetime import add_UTC_Datetime
from Utils.vpd import calculate_vpd

# Loading in-situ data


In [None]:
# Load in-situ_topography.csv as the main df

df = pd.read_csv(os.path.join(working_dir, "output", "csv", input_file_name))
df = df[~df['DFMC'].isna()]
df['Datetime'] = pd.to_datetime(df['Datetime'])
if 'UTC_Datetime' in df.columns:
    df['UTC_Datetime'] = pd.to_datetime(df['UTC_Datetime'])
else:
    df = add_UTC_Datetime(df)
df.head()

# Loading AFDRS data

In [None]:
afdrs_data_dir = os.path.join(working_dir, afdrs_path)
afdrs_df = pd.read_csv(afdrs_data_dir)
afdrs_df.head()

In [None]:
# Add SiteID using all_sites_latlon_dates.csv
site_df = pd.read_csv(os.path.join(working_dir, 'Data', 'all_sites_latlon_dates.csv'))
site_df

In [None]:
afdrs_df = afdrs_df.merge(
    site_df[['SiteID', 'Stn_lat', 'Stn_long']],
    left_on=['latitude', 'longitude'],
    right_on=['Stn_lat', 'Stn_long'],
)
afdrs_df.head()

# Combining in-situ and remote data into a single dataframe

In [None]:
# Prepare the original df
df['rounded_time'] = df['UTC_Datetime'].dt.round('1h').dt.strftime('%Y-%m-%dT%H:%M:%SZ')

# Merge on both SiteID and time
df = df.merge(
    afdrs_df[['SiteID', 'time', 'T_SFC', 'RH_SFC']],
    left_on=['SiteID', 'rounded_time'],
    right_on=['SiteID', 'time'],
    how='inner',
)

df.rename(columns={'T_SFC': 'afdrs_Temperature', 'RH_SFC': 'afdrs_RH'}, inplace=True)
df.head()

In [None]:
# Fill in df with afdrs data - the function below is taking too long


# # For each row, open afdrs data csv file one-by-one to get data
# def get_afdrs_value(row, afdrs_df):
#     SiteID_str = str(row['SiteID'])

#     target_time = row['UTC_Datetime'].round('1h').strftime('%Y-%m-%dT%H:%M:%SZ')
#     filtered = afdrs_df[(afdrs_df['time'] == target_time) & (afdrs_df['SiteID'] == SiteID_str)]

#     if not filtered.empty:
#         afdrs_temp = filtered['T_SFC'].iloc[0]
#         afdrs_rh = filtered['RH_SFC'].iloc[0]
#         return pd.Series([afdrs_temp, afdrs_rh])
#     else:
#         return pd.Series([np.nan, np.nan])


# df[['afdrs_Temperature', 'afdrs_RH']] = df.progress_apply(lambda row: get_afdrs_value(row, afdrs_df), axis=1)
# df.head()

In [None]:
# Investigate the result - There are only NaNs in veg_cover column
df[df.isna().any(axis=1)]

In [None]:
# Clean rows with no afdrs_Temperature or afdrs_RH
df = df[(~df['afdrs_Temperature'].isna()) & (~df['afdrs_RH'].isna())]
df[df.isna().any(axis=1)]

# Calculating remote DFMC from remote temperature and remote relative humidity

In [None]:
def calculate_dfmc(temp, rh):
    return np.nan  # TODO (HIGH):


df['afdrs_DFMC'] = df.apply(
    lambda row: calculate_dfmc(row['afdrs_Temperature'], row['afdrs_RH']), axis=1
)
df.head()

# Save the resulting dataframes

In [None]:
df.to_csv(os.path.join(working_dir, "output", "csv", output_file_name), index=False)