# Transformed Hourly Weather Data
Author: Colin Pannikkat

This notebook transforms the Posch et. al hourly weather data into a usable input file for the GARISOM model. Soil temperature was not provided, and is instead retrieved from NLDAS in file_builder.py when building the simulation files.

In [None]:
import pandas as pd
from datetime import datetime
import numpy as np

In [None]:
hourly_weather = pd.read_csv("230926_DBG_cottonwood_weather_clean.csv")
hourly_weather.head()

In [None]:
hourly_weather = hourly_weather[['TIMESTAMP', 'WindSpeed_Avg', 'AirT_Avg', 'AirRH_Avg']]

In [None]:
hourly_weather['TIMESTAMP'] = hourly_weather['TIMESTAMP'].apply(
    lambda x: (
        pd.to_datetime(x[:10]) + pd.Timedelta(days=1)
    ).strftime("%Y-%m-%d 00:00:00") if x[11:19] == "24:00:00" else x
)
hourly_weather['TIMESTAMP'] = pd.to_datetime(hourly_weather['TIMESTAMP'], format="%Y-%m-%d %H:%M:%S")

In [None]:
hourly_weather = hourly_weather[hourly_weather['TIMESTAMP'].dt.minute == 0]

In [None]:
hourly_weather.head()

In [None]:
par_and_rain_data = pd.read_csv("weather_community_garden_211029-240630.csv")
par_and_rain_data.head()

In [None]:
par_and_rain_data['Date.Time'] = pd.to_datetime(par_and_rain_data['Date.Time'], format="%m/%d/%Y %H:%M")

In [None]:
def round_up_to_half_hour(dt):
    minute = dt.minute
    if minute == 0:
        return dt
    elif minute <= 30:
        return dt.replace(minute=30, second=0, microsecond=0)
    else:
        return (dt + pd.Timedelta(hours=1)).replace(minute=0, second=0, microsecond=0)

In [None]:
# Round TIMESTAMP in hourly_weather up to the nearest half hour
par_and_rain_data['Date.Time'] = par_and_rain_data['Date.Time'].apply(round_up_to_half_hour)
par_and_rain_data = par_and_rain_data.rename(columns={'Date.Time' : 'TIMESTAMP'})

In [None]:
par_and_rain_data = par_and_rain_data[par_and_rain_data['TIMESTAMP'].dt.minute == 0]

In [None]:
# Filter for 07/20/2023 and hours 00:00 to 09:00
par_and_rain_data_morning = par_and_rain_data[
    (par_and_rain_data['TIMESTAMP'].dt.date == pd.to_datetime('2023-07-20').date()) &
    (par_and_rain_data['TIMESTAMP'].dt.hour.between(0, 9))
]
par_and_rain_data_morning = par_and_rain_data_morning.rename(
    columns={
        'Wind.Speed.m.s': 'WindSpeed_Avg',
        'Temp.C': 'AirT_Avg',
        'RH.percent': 'AirRH_Avg'
    }
)
par_and_rain_data_morning.head()

In [None]:
hourly_weather = pd.concat([
    par_and_rain_data_morning[['TIMESTAMP', 'WindSpeed_Avg', 'AirT_Avg', 'AirRH_Avg']],
    hourly_weather
], ignore_index=True, sort=False)

In [None]:
par_and_rain_data = par_and_rain_data[["TIMESTAMP", "Rain.mm", "PAR.mumol.m2.s"]]

In [None]:
hourly_weather = hourly_weather.merge(par_and_rain_data, how='left', on='TIMESTAMP')

In [None]:
water_amount = pd.read_csv("./water amounts_pots_2023.csv")
water_amount.head()

In [None]:
water_amount['Year'] = water_amount['Date.yymmdd'].map(lambda x: datetime.strptime(str(x), "%y%m%d").strftime("%Y"))

In [None]:
water_amount['Day'] = water_amount['DOY'].map(lambda x: datetime.strptime(str(x), "%j").strftime("%-j"))

In [None]:
water_amount['Hour'] = water_amount['Time.Start'].map(lambda x: datetime.strptime(x, "0:%H:%M").strftime("%H"))

In [None]:
new_hourly_weather = pd.DataFrame(columns=['Year', 'Day', 'Hour', 'Solar_Wm2', 'Rain_mm', 'Wind_ms.1', 'Tair_C', 'D_kPa'])

In [None]:
new_hourly_weather['Rain_mm'] = hourly_weather['Rain.mm'].astype(float)

In [None]:
new_hourly_weather['Wind_ms.1'] = hourly_weather['WindSpeed_Avg']

In [None]:
new_hourly_weather['Tair_C'] = hourly_weather['AirT_Avg'].astype(float)

In [None]:
new_hourly_weather['Year'] = hourly_weather['TIMESTAMP'].map(lambda x: x.strftime("%Y"))

In [None]:
new_hourly_weather['Day'] = hourly_weather['TIMESTAMP'].map(lambda x: x.strftime("%-j"))

In [None]:
new_hourly_weather['Hour'] = hourly_weather['TIMESTAMP'].map(lambda x: x.strftime("%H"))

In [None]:
new_hourly_weather.set_index(['Year','Day','Hour'])['Rain_mm']

In [None]:
water_amount.set_index(['Year', 'Day', 'Hour'])['irrigation.mm']

In [None]:
# Align indices before addition
water_amount_indexed = water_amount.set_index(['Year', 'Day', 'Hour'])['irrigation.mm'] / 10
new_hourly_weather_indexed = new_hourly_weather.set_index(['Year', 'Day', 'Hour'])['Rain_mm']

# Reindex water_amount_indexed to match new_hourly_weather_indexed
water_amount_indexed = water_amount_indexed.reindex(new_hourly_weather_indexed.index, fill_value=0)

# Perform addition with aligned indices
new_hourly_weather['Rain_mm'] = water_amount_indexed.add(new_hourly_weather_indexed, fill_value=0).reset_index(drop=True)

In [None]:
new_hourly_weather.head()

In [None]:
# Add irrigation values for pre May 27th twice a day, 6.32mm
new_hourly_weather.loc[
    (new_hourly_weather['Day'].astype(int) < 147) & 
    ((new_hourly_weather['Hour'].astype(int) == 6) | (new_hourly_weather['Hour'].astype(int) == 18)),
    'Rain_mm'
] += 6.32

In [None]:
def calc_e_water(T):
    '''
    Calculate saturation vapor pressure for water based on Buck (1996).
    '''
    return 6.1121 * np.exp((18.678 - (T / 234.5)) * (T / (257.14 + T))) * 0.1  # 1 hPa to 0.1 kPa
def calc_e_ice(T):
    '''
    Calculate saturation vapor pressure for ice based on Buck (1996).
    '''
    return 6.1115 * np.exp((23.036 - (T / 333.7)) * (T / (279.824 + T))) * 0.1  # 1 hPa to 0.1 kPa

In [None]:
def calc_vpd(air_temp, rh, saturation_vapor_pressure):
    '''
    Calculates VPD according to saturation vapor pressure calculations of Buck 
    (1996), these are modifications of Buck (1981) that does not require an 
    enhancement factor specification.

    VPD = e_s * (1 - RH/100)
    e_s is dependent on whether T > 0 or < 0

    air_temp must be in C, rh in percent, saturation_vapor_pressure uses Buck
    calculations which returns kPa after adjustment.
    '''
    return (saturation_vapor_pressure(air_temp) * (1 - (rh / 100)))

In [None]:
# Calculate VPD in kPa
new_hourly_weather['D_kPa'] = hourly_weather.apply(
    lambda row: calc_vpd(row['AirT_Avg'], row['AirRH_Avg'], calc_e_water) if row['AirT_Avg'] > 0 else calc_vpd(row['AirT_Avg'], row['AirRH_Avg'], calc_e_ice),
    axis=1
)

In [None]:
def convert_par_to_solar_radiation(par):
    '''
    Conversion done per:

    Reis, Mariana & Ribeiro, Aristides. (2020). Conversion factors and general 
    quations applied in agricultural and forest meteorology. 27. 227-258. 
    10.31062/agrom.v27i2.26527. 

    "The approximation 1 W m-2 ≈ 4.57 μmol m-2 s-1 (Thimijan & Heins, 1983) is 
    assuming that the W m-2 is for photosynthetically active radiation (PAR) 
    from 4.0 to 7.0 µm."

    Sensor used for cottonwood data was HOBO S-LIA-M003, which measures
    between 400 to 700 nm, so this is fine to use, but for other sensors that do
    not measure in that range, PAR is ~2.02 instead.
    '''
    return par / 4.57

In [None]:
# Subtract weird baseline (1.2) and calculate solar radiation in Wm^-2 from micromoles/m2/s
new_hourly_weather['Solar_Wm2'] = hourly_weather['PAR.mumol.m2.s'].apply(lambda x: x - 1.2).apply(convert_par_to_solar_radiation)

In [None]:
# Limit data to only within the experimental predrought and post drought time periods
new_hourly_weather = new_hourly_weather.loc[(new_hourly_weather['Day'].astype(int)>= 201) & (new_hourly_weather['Day'].astype(int) <= 265)]

In [None]:
new_hourly_weather.to_csv("../dataset.csv", index=False)