# Create Lagged Variables for Tmax, and VPD 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

In [2]:
# Get a site
site_subset = pd.read_csv("../DATASETS/Sites_Subset_20231010/ausplots_site_info/sites_subset.csv")
site_subset.head()

Unnamed: 0.1,Unnamed: 0,site_unique,site_location_name,established_date,description,bioregion_name,landform_pattern,landform_element,site_slope,site_aspect,...,pit_marker_location_method,soil_observation_type,a_s_c,plot_is_100m_by_100m,plot_is_aligned_to_grid,plot_is_permanently_marked,latitude,longitude,point,state
0,189,WAAPIL0003-57601,WAAPIL0003,2015-08-30T11:01:10,Millstream Chichester National Park. 200m east...,PIL,RIS,HSL,2.0,270.0,...,DGPS,P,,True,True,True,-21.541103,117.057006,SW,WA
1,226,NSABHC0023-57099,NSABHC0023,2014-04-21T00:00:00,Langawirra Station. 11.3km north of homestead....,BHC,PLA,PLA,0.0,,...,,P,,True,True,True,-31.34395,142.136553,SW,NSW
2,300,TCATCH0006-58807,TCATCH0006,2020-02-06T15:29:07,"Liawenee Moor. 5.4km north west of Miena, 7.5k...",TCH,PLA,PLA,1.0,9.0,...,TABLET,P,4 FE? AA AF? CD BFMOU (revised edition),True,True,True,-41.965805,146.652027,SW,TAS
3,331,WAAGAS0002-58421,WAAGAS0002,2016-08-08T15:32:20,"South Side of track, 10.1km west of abandoned ...",GAS,FLO,FAN,0.0,,...,GPS,PA,,True,True,True,-23.3847,120.479139,SW,WA
4,364,NSAMDD0014-56963,NSAMDD0014,2014-06-04T00:00:00,Scotia Sanctuary. 8kms north of homestead 131 ...,MDD,DUN,DUS,1.0,1.0,...,GPS,P,,True,True,True,-33.167939,141.191817,SW,NSW


In [3]:
## Lets try WAAPIL0003
site_location_name = 'WAAPIL0003'

In [4]:
file_name = f'{site_location_name}_1987_2022.csv'
# tmin
tmin = pd.read_csv(f'../DATASETS/Climate_Gridded/tmin/{file_name}', usecols = ['tmin', 'time'], parse_dates = ['time']).copy()

# tmax 
tmax = pd.read_csv(f'../DATASETS/Climate_Gridded/tmax/{file_name}', usecols = ['tmax', 'time'], parse_dates = ['time']).copy()

# vapourpres_09
vapourpres_h09 = pd.read_csv(f'../DATASETS/Climate_Gridded/vapourpres_h09/{file_name}', usecols = ['vapourpres_h09', 'time'], parse_dates = ['time']).copy()
vapourpres_h09['vapourpres_h09'] = vapourpres_h09['vapourpres_h09']/10 # divide by 10 to get kPa

# vapourpres_15
vapourpres_h15 = pd.read_csv(f'../DATASETS/Climate_Gridded/vapourpres_h15/{file_name}', usecols = ['vapourpres_h15', 'time'], parse_dates = ['time']).copy()
vapourpres_h15['vapourpres_h15'] = vapourpres_h15['vapourpres_h15']/10 # divide by 10 to get kPa


In [5]:
climate_data = tmin.copy().merge(tmax, left_on = 'time', right_on = 'time').merge(vapourpres_h09, left_on = 'time', right_on = 'time').\
    merge(vapourpres_h15, left_on = 'time', right_on = 'time')
climate_data = climate_data.sort_values('time')

In [6]:
climate_data.head()

Unnamed: 0,tmin,time,tmax,vapourpres_h09,vapourpres_h15
0,26.447266,1987-01-01,43.898438,0.565137,1.072461
1,26.397461,1987-01-02,44.227539,2.220117,1.123535
2,28.107422,1987-01-03,41.057617,2.29209,1.829492
3,26.827148,1987-01-04,40.477539,1.71709,1.769531
4,24.216797,1987-01-05,39.887695,2.181055,1.602539


In [7]:
climate_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13149 entries, 0 to 13148
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   tmin            13149 non-null  float64       
 1   time            13149 non-null  datetime64[ns]
 2   tmax            13149 non-null  float64       
 3   vapourpres_h09  13149 non-null  float64       
 4   vapourpres_h15  13149 non-null  float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 513.8 KB


In [8]:
# The coefficients
a = 611.2
b = 17.62
c = 243.12

In [9]:
Esat_9am = a*np.exp(
    (b * climate_data['tmin'])/
    (c + climate_data['tmin'])
)/1000

In [10]:
# Now Calculate VPD at 9am
VPD_9am = Esat_9am - climate_data['vapourpres_h09']

In [11]:
Esat_3pm = a*np.exp(
    (b * climate_data['tmax'])/
    (c + climate_data['tmax'])
)/1000

In [12]:
VPD_3pm = Esat_3pm - climate_data['vapourpres_h15']

In [13]:
climate_data['VPD'] = (VPD_9am + VPD_3pm)/2

Now applying Page (2023)'s timescales to temperature of air to VPD.

In [14]:
input_data_WAAPIL0003 = pd.read_csv('Input_DataSet_WAAPIL0003.csv', parse_dates = ['time']).copy()
input_data_WAAPIL0003 = input_data_WAAPIL0003.sort_values('time')
input_data_WAAPIL0003.set_index('time', inplace= True)
input_data_WAAPIL0003.head()

Input_Data = input_data_WAAPIL0003.copy()

In [15]:
Input_Data

Unnamed: 0_level_0,Unnamed: 0,x,y,bs,pv,npv,ue,spatial_ref,pv_filter,npv_filter,...,tmin_rolling,vapourpres_h09,vapourpres_h09_cv,vapourpres_h09_sum,vapourpres_h09_rolling,vapourpres_h15,vapourpres_h15_cv,vapourpres_h15_sum,vapourpres_h15_rolling,VPD
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1987-09-04,1.000000,505945.0,7618115.0,39.206612,8.181818,51.264463,10.231405,28350.0,8.345991,51.146632,...,,5.143250,0.285386,102.324097,,6.126770,0.275282,88.736328,,2.587931
1987-09-20,1.500000,505945.0,7618115.0,38.698347,8.665289,51.264463,10.041322,28350.0,8.751850,51.261804,...,,11.322510,0.285386,102.324097,,9.809082,0.275282,88.736328,,2.536063
1987-10-06,2.000000,505945.0,7618115.0,38.190083,9.148760,51.264463,9.851240,28350.0,8.373091,52.142329,...,,14.288513,0.285386,102.324097,,11.612854,0.275282,88.736328,,2.166326
1987-10-22,3.000000,505945.0,7618115.0,35.768595,7.611570,55.190083,9.776860,28350.0,7.701949,53.320103,...,,12.349548,0.285386,102.324097,,9.201721,0.275282,88.736328,,2.977671
1987-11-07,3.500000,505945.0,7618115.0,37.479339,6.830579,54.165289,9.913223,28350.0,7.114724,54.437294,...,,13.974487,0.285386,102.324097,,11.554993,0.275282,88.736328,,2.958083
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-11,967.333333,505945.0,7618115.0,30.674931,17.851240,49.867769,11.380165,28350.0,17.441988,50.530132,...,,,,,,,,,,
2023-02-27,967.666667,505945.0,7618115.0,31.011019,17.909091,49.462810,11.404959,28350.0,18.384564,50.843905,...,,,,,,,,,,
2023-03-15,968.000000,505945.0,7618115.0,31.347107,17.966942,49.057851,11.429752,28350.0,18.501131,51.677140,...,,,,,,,,,,
2023-03-31,969.000000,505945.0,7618115.0,19.892562,16.454545,61.975207,11.735537,28350.0,17.348715,53.114766,...,,,,,,,,,,


In [16]:
VPD = climate_data[['time', 'VPD']]
VPD_index = VPD.index[VPD.index == Input_Data.index[0]]

In [17]:
Input_Data.index[0]

Timestamp('1987-09-04 00:00:00')

In [18]:
# Specify time ranges: 
ts_0 = [1,1]
ts_1 = [2, 7]
ts_2 = [8, 14]
ts_3 = [15, 30]
ts_all = [ts_0, ts_1, ts_2, ts_3]

In [19]:
# set columns of precip
VPD_col_names = ['VPD_lag', 'VPD_7', 'VPD_14', 'VPD_30']
Input_Data = Input_Data.reindex(axis = 1, labels = Input_Data.columns.tolist() + VPD_col_names).copy() # generate new columns

In [20]:
for input_date in range(len(Input_Data)):
    # Get associated index in daily precip data 
    VPD_index = VPD.index[VPD.time == Input_Data.index[input_date]]
    VPD_vars = [] 
    
    if VPD_index.empty: # when there are no longer a match (meaning that the FC data overextends the precip), break
        break
    
    for ts in ts_all:

        upper = (VPD_index - ts[1])[0]
        lower = (VPD_index - ts[0] + 1)[0]
        
        if upper >= 0: # check if the upperbounds goes above allowable index 

            print(ts)
            temp = VPD.iloc[upper:lower] # grab the range 
            temp_sum = temp[temp.select_dtypes(include = ["float64"]).columns.tolist()].sum() # calc sum
            print(temp)
            print(len(temp))
        else: # if not in allowable index, set to na 
            temp_sum = pd.NA
        print(temp_sum)
        VPD_vars.append(temp_sum)
        
        # append precip column wise for a particular row 
    for i in range(len(VPD_col_names)):
        Input_Data.loc[Input_Data.index[input_date], VPD_col_names[i]]= VPD_col_names[i]

[1, 1]
          time       VPD
245 1987-09-03  2.305124
1
VPD    2.305124
dtype: float64
[2, 7]
          time       VPD
239 1987-08-28  1.850887
240 1987-08-29  2.270785
241 1987-08-30  1.937995
242 1987-08-31  1.701711
243 1987-09-01  1.481925
244 1987-09-02  2.016483
6
VPD    11.259785
dtype: float64
[8, 14]
          time       VPD
232 1987-08-21  1.516515
233 1987-08-22  2.028276
234 1987-08-23  1.736446
235 1987-08-24  2.063085
236 1987-08-25  1.538651
237 1987-08-26  1.708977
238 1987-08-27  1.535571
7
VPD    12.127521
dtype: float64
[15, 30]
          time       VPD
216 1987-08-05  2.107938
217 1987-08-06  2.307363
218 1987-08-07  2.178207
219 1987-08-08  2.185139
220 1987-08-09  2.279240
221 1987-08-10  2.062058
222 1987-08-11  2.025795
223 1987-08-12  1.610026
224 1987-08-13  1.371426
225 1987-08-14  1.832194
226 1987-08-15  2.177367
227 1987-08-16  2.231848
228 1987-08-17  2.490262
229 1987-08-18  2.393992
230 1987-08-19  2.113065
231 1987-08-20  2.265861
16
VPD    33.63178

In [21]:
VPD.index[VPD.index == Input_Data.index[input_date]]

Index([], dtype='int64')

In [22]:
Input_Data

Unnamed: 0_level_0,Unnamed: 0,x,y,bs,pv,npv,ue,spatial_ref,pv_filter,npv_filter,...,vapourpres_h09_rolling,vapourpres_h15,vapourpres_h15_cv,vapourpres_h15_sum,vapourpres_h15_rolling,VPD,VPD_lag,VPD_7,VPD_14,VPD_30
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1987-09-04,1.000000,505945.0,7618115.0,39.206612,8.181818,51.264463,10.231405,28350.0,8.345991,51.146632,...,,6.126770,0.275282,88.736328,,2.587931,VPD_lag,VPD_7,VPD_14,VPD_30
1987-09-20,1.500000,505945.0,7618115.0,38.698347,8.665289,51.264463,10.041322,28350.0,8.751850,51.261804,...,,9.809082,0.275282,88.736328,,2.536063,VPD_lag,VPD_7,VPD_14,VPD_30
1987-10-06,2.000000,505945.0,7618115.0,38.190083,9.148760,51.264463,9.851240,28350.0,8.373091,52.142329,...,,11.612854,0.275282,88.736328,,2.166326,VPD_lag,VPD_7,VPD_14,VPD_30
1987-10-22,3.000000,505945.0,7618115.0,35.768595,7.611570,55.190083,9.776860,28350.0,7.701949,53.320103,...,,9.201721,0.275282,88.736328,,2.977671,VPD_lag,VPD_7,VPD_14,VPD_30
1987-11-07,3.500000,505945.0,7618115.0,37.479339,6.830579,54.165289,9.913223,28350.0,7.114724,54.437294,...,,11.554993,0.275282,88.736328,,2.958083,VPD_lag,VPD_7,VPD_14,VPD_30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-11,967.333333,505945.0,7618115.0,30.674931,17.851240,49.867769,11.380165,28350.0,17.441988,50.530132,...,,,,,,,,,,
2023-02-27,967.666667,505945.0,7618115.0,31.011019,17.909091,49.462810,11.404959,28350.0,18.384564,50.843905,...,,,,,,,,,,
2023-03-15,968.000000,505945.0,7618115.0,31.347107,17.966942,49.057851,11.429752,28350.0,18.501131,51.677140,...,,,,,,,,,,
2023-03-31,969.000000,505945.0,7618115.0,19.892562,16.454545,61.975207,11.735537,28350.0,17.348715,53.114766,...,,,,,,,,,,


In [23]:
VPD_index 

Index([], dtype='int64')

In [24]:
VPD_index

Index([], dtype='int64')

In [25]:
VPD_index = VPD.index[VPD.time == (Input_Data.index[0])]

In [26]:
VPD.iloc[VPD_index-1]

Unnamed: 0,time,VPD
245,1987-09-03,2.305124


In [27]:
temp[temp.select_dtypes(include = ["float64"]).columns.tolist()[0]].sum()

60.634117251999044