# Start

This script processes daily Soil Dryness Index (SDI) data from ERATOS and matches it with the field observation that has the minimum soil moisture value for each day, preparing the data for further analysis.

In [None]:
working_dir = '../..'  # This repository's root directory
input_file_name = 'in-situ_topography_fc.csv'
output_file_name = 'eratos_sdi_fc_9am.csv'

eratos_sdi_path = 'Data/eratos/ANU_CombinedSites_SDI.csv'  # daily

In [None]:
import sys

sys.path.append(working_dir)
import os

import numpy as np
from tqdm import tqdm

tqdm.pandas()
import pandas as pd

from Utils.daily_minmax import get_daily_extreme_observation
from Utils.datetime import add_UTC_Datetime
from Utils.vpd import calculate_vpd

# Loading in-situ data


In [None]:
# Load in-situ_topography.csv as the main df

df = pd.read_csv(os.path.join(working_dir, "output", "csv", input_file_name))
# df = df[~df['Soil_mois'].isna()]
df['Datetime'] = pd.to_datetime(df['Datetime'])
if 'UTC_Datetime' in df.columns:
    df['UTC_Datetime'] = pd.to_datetime(df['UTC_Datetime'])
else:
    df = add_UTC_Datetime(df)
df.head()

# Loading ERATOS data

In [None]:
eratos_sdi_data_dir = os.path.join(working_dir, eratos_sdi_path)
eratos_sdi_df = pd.read_csv(eratos_sdi_data_dir)
eratos_sdi_df.head()

# Combining in-situ and remote data into a single dataframe

In [None]:
df_9am = df[(df['Datetime'].dt.hour == 9) & (df['Datetime'].dt.minute == 0)]
df_9am.head()

In [None]:
# Fill in df_9am with eratos data


# For each row, open eratos data csv file one-by-one to get data
def get_eratos_value(row, eratos_df):
    SiteID_str = str(row['SiteID'])
    SiteID_str = ''.join(c for c in SiteID_str if c.isdigit())

    target_time = row['UTC_Datetime'].round('1d').strftime('%Y-%m-%d')
    try:
        eratos_value = eratos_df.loc[eratos_df['Date'] == target_time, SiteID_str].values[0]
        return eratos_value
    except (IndexError, KeyError):
        return np.nan


df_9am['eratos_SDI'] = df_9am.progress_apply(
    lambda row: get_eratos_value(row, eratos_sdi_df), axis=1
)
df_9am.head()

In [None]:
# Investigate the result - There are only NaNs in veg_cover column
df_9am[df_9am['eratos_SDI'].isna()]

# Save the resulting dataframes

In [None]:
df_9am = df_9am[~df_9am['eratos_SDI'].isna()]
df_9am.to_csv(os.path.join(working_dir, "output", "csv", output_file_name), index=False)