# A-gs model and implementation (simulation CO2 and H2O flux)

## Initialize data and model

### Setup and fetch data

In [None]:
# Settings
Username   = 'Beheerder'
years      = range(2008,2019)    #(1997,2021) # Set years to download

In [None]:
import os
datapath   = os.path.join('../')
print('datapath is set to %s'%datapath)

# !pip install numpy
# !pip install pandas
# !pip install matplotlib
# !pip install plotly 
# !pip install cufflinks
#!pip install colorspacious
#!pip install seaborn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import plotly.express as px
#import cufflinks as cf
import matplotlib.dates as mdate
import matplotlib.ticker as ticker
from matplotlib import cm
#from colorspacious import cspace_converter
import scipy.stats as stats
#cf.go_offline()
# cf.set_config_file(offline=False, world_readable=True)

from datetime import datetime, timedelta
import sys
sys.path.insert(0, os.path.join(datapath,'PythonScripts'))
from Loobos_Toolbox import dateparse, dateparse_Gapfilled, Read_LoobosEddFinal, Read_LooStor, Read_LoodatGapfill, Read_Loobos_halfhourly, Read_Loobos_meteo, Read_Loobos_soil, Read_Loobos_profile

from Ags_model import runAgs, calc_LE

In [None]:
#these next two lines are to prevent re-loading the data. If you want to re-load data, instead comment them out
if not 'progress' in globals(): progress = list()
if not 'dataloaded' in progress:
  # Read files
    df_EC           = Read_LoobosEddFinal    (years,datapath)
    df_Stor         = Read_LooStor           (years,datapath)
    df_Comb         = Read_LoodatGapfill     (years,datapath)
    df_NEE          = Read_Loobos_halfhourly (years,datapath)
    df_meteo        = Read_Loobos_meteo      (years,datapath)
    df_soil         = Read_Loobos_soil       (years,datapath) 
    df_profile      = Read_Loobos_profile    (years,datapath)
    progress.append('dataloaded')

In [None]:
#OLD filters
# Make filter for GPP orginial data and not gabfilled
#I = ((df_Comb['GPP_fqc']==0)&(df_meteo['PAR']>0))

# Filter for CO2 data
#t = df_profile.index                                          
#time = (t < np.datetime64('2013-05-08')) | (t > np.datetime64('2013-06-01'))
#CO2 = (df_profile['CO2level1'] > 300)

#General filter
#I = ((df_Comb['GPP_fqc']==0)&(df_meteo['PAR']>0))

#df_meteo_CO2 = df_meteo[time][CO2]
#df_meteo_filter = df_meteo_CO2[I]

#df_Comb_CO2 = df_Comb[time][CO2]
#df_Comb_filter = df_Comb_CO2[I]

#df_profile_CO2 = df_profile[time][CO2]
#df_profile_filter = df_profile_CO2[I]

#df_EC_CO2 = df_EC[time][CO2]
#df_EC_filter = df_EC_CO2[I]

In [None]:
# Make filter for GPP orginial data and not gapfilled
#General filters
I = ((df_Comb['GPP_fqc']==0)&(df_meteo['PAR']>0))
#t = df_profile.index                                          
#time = (t < np.datetime64('2013-05-08')) | (t > np.datetime64('2013-06-01'))

# Filter for CO2 data
CO2 = (df_profile['CO2level1'] > 300)

# Filter for L(o)corr data
Locorr= (df_meteo['L(o)corr']>0) 

# Filter for VPD data
VPD = (df_Comb['VPD']>=0)

# Filter for U-star
Ustar = (df_EC['U-star']>=0)

# Combine all filters
filter = I & CO2 & Locorr & VPD & Ustar

#Column 'CO2' is input from df_profile
#df_profile_CO2 = df_profile[CO2]
#df_profile_filter = df_profile_CO2[I]
df_profile_filter = df_profile[filter]

#Column 'L(o)corr' and 'PAR' are inputs from df_meteo
#df_meteo_CO2 = df_meteo[CO2]
#df_meteo_filter = df_meteo_CO2[I]
df_meteo_filter = df_meteo[filter]

#Columns 'VPD' and 'Tair' are inputs from df_Comb
#df_Comb_CO2 = df_Comb[CO2]
#df_Comb_filter = df_Comb_CO2[I]
df_Comb_filter = df_Comb[filter]

# Columns 'Mea_Windsp' and 'U-star' are inputs from df_EC
#df_EC_CO2 = df_EC[CO2]
#df_EC_filter = df_EC_CO2[I]
df_EC_filter = df_EC[filter]

### Run A-gs model

In [None]:
# Run A-gs model
#fstr=1.0
#an_final,an_umol,rs, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr)

# Run A-gs model for different fstr values

an_final_10,an_umol_10,rs_10, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=1.0)
an_final_8,an_umol_8,rs_8, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=0.8)
an_final_6,an_umol_6,rs_6, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=0.6)
an_final_4,an_umol_4,rs_4, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=0.4)
an_final_2,an_umol_2,rs_2, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=0.2)


### Assemble dataframe 'df_ET' for simulated LE

In [None]:
df_ET = pd.concat([df_meteo['L(o)'],df_meteo['Te-L(o)'],df_profile['Pressure'],df_Comb['VPD'],df_Comb['rH'],df_meteo['P(mast)']],axis=1,sort=False)
#convert Pressure from hPa to kPa 
df_ET['p_kPa']=df_ET['Pressure']/10
df_ET['VPD_adj']=df_ET['VPD'].loc[df_ET['VPD']>0] #some outlier values for VPD are negative, remove from dataset
df_ET['VPD_adj']=df_ET['VPD_adj']/10  # VPD from df_Comb is in hPa, I need kPa, so hPa/10 = kPa

In [None]:
def init_ETframe(rs_series):
    df_ET = pd.concat([df_meteo['L(o)'],df_meteo['Te-L(o)'],df_profile['Pressure'],df_Comb['VPD'],df_Comb['rH'],df_meteo['P(mast)']],axis=1,sort=False)
    #convert Pressure from hPa to kPa 
    df_ET['p_kPa']=df_ET['Pressure']/10
    df_ET['VPD_adj']=df_ET['VPD'].loc[df_ET['VPD']>0] #some outlier values for VPD are negative, remove from dataset
    df_ET['VPD_adj']=df_ET['VPD_adj']/10  # VPD from df_Comb is in hPa, I need kPa, so hPa/10 = kPa
    df_ET['rs']=rs_series.to_frame(name="rs")
    df_ET['ra']=ra.to_frame(name="ra")
    return df_ET

In [None]:
df_ET_10=init_ETframe(rs_10)
df_ET_8=init_ETframe(rs_8)
df_ET_6=init_ETframe(rs_6)
df_ET_4=init_ETframe(rs_4)
df_ET_2=init_ETframe(rs_2)

### calculate ET

In [None]:
df_ET=calc_LE(df_ET,rs,ra)

In [None]:
df_ET_10=calc_LE(df_ET_10)
df_ET_8=calc_LE(df_ET_8)
df_ET_6=calc_LE(df_ET_6)
df_ET_4=calc_LE(df_ET_4)
df_ET_2=calc_LE(df_ET_2)

### Calculate gs from Penman-Monteith

In [None]:
start='2017-04-14 00:00'
end='2017-04-30 00:00'

rs=rs_8

df_ra=pd.DataFrame({'ra':ra})
df_rs=pd.DataFrame({'rs':rs})
#calculate inverse of aerodynamic resistance, aero conductance
df_ra['ga'] = 1. / df_ra['ra']
#and stomatal conductance
df_rs['gs'] = 1. / df_rs['rs']
df_ra=df_ra.resample("1H").mean()
df_rs=df_rs.resample("1H").mean()

df_gs=pd.concat([df_profile_filter['Pressure'],df_Comb_filter['LE'],df_Comb_filter['rH'],df_Comb_filter['Tair'],df_Comb_filter['VPD'],df_meteo_filter['P(mast)'],df_meteo_filter['R(net)'],df_meteo_filter['G1']],axis=1,sort=False)
df_gs['Tair_K']=df_gs['Tair']+273.15
df_gs['p_kPa']=df_gs['Pressure']/10
df_gs['p_kPa']=df_gs['p_kPa'].loc[df_gs['p_kPa']>1]
df_gs['LE']=df_gs['LE'].loc[df_gs['LE']>-5]
df_gs['VPD_adj']=df_gs['VPD'].loc[df_gs['VPD']>0] #some outlier values for VPD are negative, remove from dataset
df_gs['VPD_adj']=df_gs['VPD_adj']/10  # VPD from df_Comb is in hPa, I need kPa, so hPa/10 = kPa

df_gs['last3day_prec']=df_gs['P(mast)'].rolling('72H').sum()
df_gs=df_gs.loc[df_gs['last3day_prec']==0]


#df_gs=df_gs.resample("M").mean()

#df_gs=df_gs.loc[df_gs['rH']<95.0]
#df_gs['Tair'].plot()
#df_rs['gs'].plot()

In [None]:
#calculating q and cp

    #constants:
e_sat_0 = 0.6107 # e_sat_0 = 0.6107 kPa or 610.7 Pa
a = 7.5
b = 237.3 # oC (geen typo)
df_gs['e_sat'] = e_sat_0 * 10**(a*df_gs['Tair'] / (b+df_gs['Tair'])) #  T_sfc_C in oC
Rd = 287 # J/kg K
Rv = 462 # J/kg K
df_gs['q_sat'] = Rd/Rv * df_gs['e_sat']/df_gs['p_kPa'] #q = Rd/Rv * e/p -> q_sat = Rd/Rv * e_sat/p
df_gs['e_act'] = df_gs['e_sat'] - df_gs['VPD_adj'] #VPD = e_sat - e_act -> e_act = e_sat - VPD
df_gs['q_act'] = Rd/Rv * df_gs['e_act']/df_gs['p_kPa'] #q = Rd/Rv * e/p . output is in g/kg (order of magnitude 0.005-0.015)
#now from q calculate cp
cpd=1004.67 #J/kg/K
df_gs['cp_calc']=cpd*(1+0.84*df_gs['q_act']) #q in g/kg?


#df_gs['cp_calc'].plot()


In [None]:
#gs sim vs gs calculated plot
#like in research paper by eline floor houwen

#df_gs=df_gs.resample('3H').mean()
#df_gs=df_gs.between_time('9:00','15:00')

rho=1.2  
#cpd=1004.67 #J/kg/K
#cp=cpd*(1+0.84q) #q in g/kg?
cp=1006  #J/kg/K , approximation
Lv=2.26 #MJ/kg, based on 2260000 J/kg 
cp_calc=df_gs['cp_calc']

df_gs['gamma'] = (cp * df_gs['p_kPa']*1000) / (0.622 * Lv*1000000) #cp in J/kg/K , P in Pa, Lv in J/kg
df_gs['gamma_1'] = (cp_calc * df_gs['p_kPa']*1000) / (0.622 * Lv*1000000) #cp in J/kg/K , P in Pa, Lv in J/kg

#example value for psychometric constant is 0.054 kPa/°C or 50Pa/°C 

df_gs['delta'] = 4098 * (0.6108 * np.exp((17.27*df_gs['Tair'])/(df_gs['Tair']+237.3))) / ((df_gs['Tair']+237.3)**2)  # T in C,

#df_gs['gamma'].plot()
#df_gs['gamma_1'].plot()

In [None]:
LE=df_gs['LE'] # in the order of 50Wm-2, ranges 16-94
gamma=df_gs['gamma_1'] #in the order of 72 Pa/K or 0.072, ranges 71.51-72.8 #BUT THE EQUATION EXPECTS kPa/K!
ga=df_ra['ga'] # in the order of 0.14, ranges 0.104-0.266
delta=df_gs['delta'] #in the order of 0.10 ranges 0.04-0.15
Rnet=df_gs['R(net)'] #in the order of 100, ranges 3-222
G=df_gs['G1'] #in the order of 0.1, ranges -3 to +3 # SUSPECT value, however R(net)-G1 ranges from 6.28-219.69 so never becomes negative
#cp=df_gs['cp']
VPD=df_gs['VPD_adj'] #in order of  0.60, ranges 0.059-1.33

first_term = df_gs['LE']*(df_gs['gamma_1']/1000)*df_ra['ga']
second_term=delta*(Rnet-G)
third_term=rho*cp_calc*ga*VPD
fourth_term=LE*(delta+(df_gs['gamma_1']/1000))

df_gs['gs'] = (first_term) / ( (second_term) + (third_term) - (fourth_term) )
df_gs['lower_half']= (second_term) + (third_term) - (fourth_term)

#df_gs['gs'].plot()
#df_gs['gs'] = (LE*gamma*ga) / ( (delta*(Rnet-G)) + (rho*cp*ga*VPD) - (LE*(delta+gamma)) )

#df_gs['gs'] = (df_gs['LE'] * df_gs['gamma'] * df_ra['ga']) /(df_gs['delta']*(df_gs['R(net)']-df_gs['G1']) + rho*df_gs['cp']*df_ra['ga']*df_gs['VPD_adj'] - df_gs['LE']*(df_gs['delta']+df_gs['gamma']))

df_gs['gs'].plot(label="simulated gs for fstr=1.0",legend="yes",title="simulated and observed gs",ylabel="m/s")
df_rs['gs'].plot(label="observed gs from LE",legend="yes")
#df_gs['P(mast)'].plot()
#first_term.plot()
#second_term.plot()
#third_term.plot()
#fourth_term.plot()
#((second_term) + (third_term)).plot()
#plotting

In [None]:
#draw binned gs
start='2008-04-01 00:00'
end='2018-08-30 00:00'

sim_data=df_gs.loc[start:end,['gs','P(mast)']].resample('1H').mean()#.between_time("11:00", "18:00")
sim_data['last3day_prec']=sim_data['P(mast)'].rolling('72H').sum()
sim_data=sim_data.loc[sim_data['last3day_prec']==0.0]

sim_data = sim_data.loc[(sim_data.index.month>=5) & (sim_data.index.month<=8)]

sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_rs['gs'].loc[start:end].resample('1H').mean()#.between_time("11:00", "18:00")
measured_data=measured_data.dropna()

#sim_data.rename("gs_sim")
#measured_data.rename("gs_meas")

df_tmp=pd.DataFrame(sim_data)
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

hours_list=[ 9, 10, 11, 12, 13, 14, 15]

d={}
d['meas_avg'] = [df_tmp.loc[df_tmp.index.hour==i,'gs_y'].median() for i in hours_list]
d['meas_q1'] = [df_tmp.loc[df_tmp.index.hour==i,'gs_y'].quantile(q=0.25) for i in hours_list]
d['meas_q3'] = [df_tmp.loc[df_tmp.index.hour==i,'gs_y'].quantile(q=0.75) for i in hours_list]
d['sim_avg'] = [df_tmp.loc[df_tmp.index.hour==i,'gs_x'].median() for i in hours_list]
d['sim_q1'] = [df_tmp.loc[df_tmp.index.hour==i,'gs_x'].quantile(q=0.25) for i in hours_list]
d['sim_q3'] = [df_tmp.loc[df_tmp.index.hour==i,'gs_x'].quantile(q=0.75) for i in hours_list]

df_avg=pd.DataFrame(d)

fig,ax = plt.subplots()
p1=ax.plot(hours_list,df_avg['meas_avg'],marker='o',label=r'median measured ("big-leaf") $g_{s_{H_{2}O}}$')
ax.fill_between(hours_list,df_avg['meas_q1'],df_avg['meas_q3'],alpha=0.1,label='IQR')
p2=ax.plot(hours_list,df_avg['sim_avg'],marker='s',label=r'median simulated (A-gs) $g_{s_{H_{2}O}}$')
ax.fill_between(hours_list,df_avg['sim_q1'],df_avg['sim_q3'],alpha=0.1,label='IQR')

ax.set_ylim(0.004,0.02)
#ax.set_xlim(-5,40)
plt.xticks(hours_list) 
ax.set_xlabel('hour')
ax.set_ylabel(r'$g_{s_{H_{2}O}} [m s^{-1}]$')
fig.suptitle('Hourly median values for stomatal conductance of water vapor \n (2008-2018 during growth season months: May-Oct, for precip 3day sum = 0)')
ax.legend(loc='upper right')


In [None]:
#draw binned gs
start='2008-04-01 00:00'
end='2018-08-30 00:00'

sim_data=df_gs.loc[start:end,['gs','P(mast)']].resample('1H').mean()#.between_time("11:00", "18:00")
sim_data['last3day_prec']=sim_data['P(mast)'].rolling('72H').sum()
sim_data=sim_data.loc[sim_data['last3day_prec']==0.0]

sim_data = sim_data.loc[(sim_data.index.month>=5) & (sim_data.index.month<=8)]

sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_rs['gs'].loc[start:end].resample('1H').mean()#.between_time("11:00", "18:00")
measured_data=measured_data.dropna()

#sim_data.rename("gs_sim")
#measured_data.rename("gs_meas")

df_tmp=pd.DataFrame(sim_data)
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

hours_list=[ 9, 10, 11, 12, 13, 14, 15]

d={}
d['meas_avg'] = [df_tmp.loc[df_tmp.index.hour==i,'gs_y'].mean() for i in hours_list]
d['meas_q1'] = [df_tmp.loc[df_tmp.index.hour==i,'gs_y'].quantile(q=0.25) for i in hours_list]
d['meas_q3'] = [df_tmp.loc[df_tmp.index.hour==i,'gs_y'].quantile(q=0.75) for i in hours_list]
d['sim_avg'] = [df_tmp.loc[df_tmp.index.hour==i,'gs_x'].mean() for i in hours_list]
d['sim_q1'] = [df_tmp.loc[df_tmp.index.hour==i,'gs_x'].quantile(q=0.25) for i in hours_list]
d['sim_q3'] = [df_tmp.loc[df_tmp.index.hour==i,'gs_x'].quantile(q=0.75) for i in hours_list]

df_avg=pd.DataFrame(d)

fig,ax = plt.subplots()
p1=ax.plot(hours_list,df_avg['meas_avg'],marker='o',label=r'median measured $g_{s_{H_{2}O}}$')
ax.fill_between(hours_list,df_avg['meas_q1'],df_avg['meas_q3'],alpha=0.1,label='IQR')
p2=ax.plot(hours_list,df_avg['sim_avg'],marker='s',label=r'median simulated $g_{s_{H_{2}O}}$')
ax.fill_between(hours_list,df_avg['sim_q1'],df_avg['sim_q3'],alpha=0.1,label='IQR')

ax.set_ylim(0.004,0.02)
#ax.set_xlim(-5,40)
plt.xticks(hours_list) 
ax.set_xlabel('hour')
ax.set_ylabel(r'$g_{s_{H_{2}O}} [m s^{-1}]$')
fig.suptitle('Hourly mean values for stomatal conductance of water vapor \n (2008-2018 during growth season months: May-Oct)')
ax.legend(loc='upper right')
df_avg


In [None]:
df_tmp.loc[df_tmp.index.hour==10,'gs_x'].plot(kind="hist")


In [None]:
df_gs['first_term'] = df_gs['LE']*(df_gs['gamma_1']/1000)*df_ra['ga']
df_gs['second_term'] = delta*(Rnet-G)
df_gs['third_term'] = rho*cp_calc*ga*VPD
df_gs['fourth_term'] = LE*(delta+(df_gs['gamma_1']/1000))

In [None]:
start='2017-11-30'
end='2018-02-28'
df_gs.loc[start:end]

In [None]:
#df_gs['lower_half'].plot()
df_gs.columns
#df_ra.loc[start:end,'ga']

In [None]:
df_soil['SM-Lit'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-003'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-020'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-050'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-100'].loc['2017-01-01':'2017-12-30'].plot()

In [None]:
df_soil['SM-Lit'].loc['2017-04-17':'2017-04-22'].plot()
df_soil['SM-003'].loc['2017-04-17':'2017-04-22'].plot()
df_soil['SM-020'].loc['2017-04-17':'2017-04-22'].plot()
df_soil['SM-050'].loc['2017-04-17':'2017-04-22'].plot()
df_soil['SM-100'].loc['2017-04-17':'2017-04-22'].plot()