# A-gs model and implementation (simulation CO2 and H2O flux)

## Initialize data and model

### Setup and fetch data

In [None]:
# Settings
Username   = 'Beheerder'
years      = range(2001,2020)    #(1997,2021) # Set years to download

In [None]:
import os
datapath   = os.path.join('../')
print('datapath is set to %s'%datapath)

# !pip install numpy
# !pip install pandas
# !pip install matplotlib
# !pip install plotly 
# !pip install cufflinks
#!pip install colorspacious
#!pip install seaborn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import plotly.express as px
#import cufflinks as cf
import matplotlib.dates as mdate
import matplotlib.ticker as ticker
from matplotlib import cm
#from colorspacious import cspace_converter
import scipy.stats as stats
#cf.go_offline()
# cf.set_config_file(offline=False, world_readable=True)

from datetime import datetime, timedelta
import sys
sys.path.insert(0, os.path.join(datapath,'PythonScripts'))
from Loobos_Toolbox import dateparse, dateparse_Gapfilled, Read_LoobosEddFinal, Read_LooStor, Read_LoodatGapfill, Read_Loobos_halfhourly, Read_Loobos_meteo, Read_Loobos_soil, Read_Loobos_profile

from Ags_model import runAgs, runAgs2, calc_LE

In [None]:
#these next two lines are to prevent re-loading the data. If you want to re-load data, instead comment them out
if not 'progress' in globals(): progress = list()
if not 'dataloaded' in progress:
  # Read files
    df_EC           = Read_LoobosEddFinal    (years,datapath)
    df_Stor         = Read_LooStor           (years,datapath)
    df_Comb         = Read_LoodatGapfill     (years,datapath)
    df_NEE          = Read_Loobos_halfhourly (years,datapath)
    df_meteo        = Read_Loobos_meteo      (years,datapath)
    df_soil         = Read_Loobos_soil       (years,datapath) 
    df_profile      = Read_Loobos_profile    (years,datapath)
    progress.append('dataloaded')

In [None]:
# Make filter for GPP orginial data and not gapfilled
#General filters
I = ((df_Comb['GPP_fqc']==0)&(df_meteo['PAR']>0))
#t = df_profile.index                                          
#time = (t < np.datetime64('2013-05-08')) | (t > np.datetime64('2013-06-01'))

# Filter for CO2 data
CO2 = (df_profile['CO2level1'] > 300)

# Filter for L(o)corr data
Locorr= (df_meteo['L(o)corr']>0) 

# Filter for VPD data
VPD = (df_Comb['VPD']>=0)

# Filter for U-star
Ustar = (df_EC['U-star']>=0)

# Combine all filters
filter = I & CO2 & Locorr & VPD & Ustar

#Column 'CO2' is input from df_profile
#df_profile_CO2 = df_profile[CO2]
#df_profile_filter = df_profile_CO2[I]
df_profile_filter = df_profile[filter]

#Column 'L(o)corr' and 'PAR' are inputs from df_meteo
#df_meteo_CO2 = df_meteo[CO2]
#df_meteo_filter = df_meteo_CO2[I]
df_meteo_filter = df_meteo[filter]

#Columns 'VPD' and 'Tair' are inputs from df_Comb
#df_Comb_CO2 = df_Comb[CO2]
#df_Comb_filter = df_Comb_CO2[I]
df_Comb_filter = df_Comb[filter]

# Columns 'Mea_Windsp' and 'U-star' are inputs from df_EC
#df_EC_CO2 = df_EC[CO2]
#df_EC_filter = df_EC_CO2[I]
df_EC_filter = df_EC[filter]

### Run A-gs model

In [None]:
# Run A-gs model
fstr=0.8
an_final,an_umol,rs, ra, Ts_C = runAgs2(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr)

## Calcuate ET

### Assemble dataframe 'df_ET' that will hold output and fill with inputs

In [None]:
df_ET = pd.concat([df_meteo['L(o)'],df_meteo['Te-L(o)'],df_profile['Pressure'],df_Comb['VPD'],df_Comb['rH'],df_meteo['P(mast)']],axis=1,sort=False)
#convert Pressure from hPa to kPa 
df_ET['p_kPa']=df_ET['Pressure']/10
df_ET['VPD_adj']=df_ET['VPD'].loc[df_ET['VPD']>0] #some outlier values for VPD are negative, remove from dataset
df_ET['VPD_adj']=df_ET['VPD_adj']/10  # VPD from df_Comb is in hPa, I need kPa, so hPa/10 = kPa

### calculate ET

In [None]:
def init_ETframe(rs_series):
    df_ET = pd.concat([df_meteo['L(o)'],df_meteo['Te-L(o)'],df_profile['Pressure'],df_Comb['VPD'],df_Comb['rH'],df_meteo['P(mast)']],axis=1,sort=False)
    #convert Pressure from hPa to kPa 
    df_ET['p_kPa']=df_ET['Pressure']/10
    df_ET['VPD_adj']=df_ET['VPD'].loc[df_ET['VPD']>0] #some outlier values for VPD are negative, remove from dataset
    df_ET['VPD_adj']=df_ET['VPD_adj']/10  # VPD from df_Comb is in hPa, I need kPa, so hPa/10 = kPa
    df_ET['rs']=rs_series.to_frame(name="rs")
    df_ET['ra']=ra.to_frame(name="ra")
    return df_ET

In [None]:
df_ET=init_ETframe(rs)

In [None]:
df_ET=calc_LE(df_ET)

## Correlation plots

### CO2 flux

In [None]:
start='2008-05-01 00:00'
end='2018-08-30 00:00'
st='2018-05-01 00:00'
ed='2018-08-30 00:00'
time_s="9:00"
time_e="18:00"


sim_data_VPD=df_Comb_filter.loc[start:end,'VPD'].resample('3H').mean().between_time(time_s,time_e)
sim_data_VPD=sim_data_VPD.dropna() #eliminate NaN entries created by the .loc slice
sim_data_CO2=df_profile_filter.loc[start:end,'CO2level1'].resample('3H').mean().between_time(time_s,time_e)
sim_data_CO2=sim_data_CO2.dropna() #eliminate NaN entries created by the .loc slice
sim_data_PAR=df_meteo_filter.loc[start:end,'PAR'].resample('3H').mean().between_time(time_s,time_e)
sim_data_PAR=sim_data_PAR.dropna() #eliminate NaN entries created by the .loc slice

Ts_data=Ts_C.loc[start:end].resample('3H').mean().between_time(time_s,time_e)
Ts_data=Ts_data.rename('Ts_data') #turn into a named series so it works with df.merge() properly
#Ts_data=Ts_data.resample('3H').mean()#.between_time("11:00", "18:00")
Ts_data=Ts_data.dropna() #eliminate NaN entries created by the .loc slice

an_data=an_umol.loc[start:end].resample('3H').mean().between_time(time_s,time_e)
an_data=an_data.dropna() #eliminate NaN entries created by the .loc slice
an_data=an_data.rename('an_data') #turn into a named series so it works with df.merge() properly


df_tmp1 = pd.DataFrame()
df_tmp1['an_data']=an_data
df_tmp1=df_tmp1.merge(sim_data_VPD, how='inner',left_index=True, right_index=True)

df_tmp2 = pd.DataFrame()
df_tmp2['an_data']=an_data
df_tmp2=df_tmp2.merge(sim_data_CO2, how='inner',left_index=True, right_index=True)

df_tmp3 = pd.DataFrame()
df_tmp3['an_data']=an_data
df_tmp3=df_tmp3.merge(sim_data_PAR, how='inner',left_index=True, right_index=True)

df_tmp4 = pd.DataFrame()
df_tmp4['an_data']=an_data
df_tmp4=df_tmp4.merge(Ts_data, how='inner',left_index=True, right_index=True)

fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(nrows=2, ncols=2)
fig.suptitle('Sensitivity of An to VPD, CO2, PAR, and Ts. \n (2008-2017 during growth seasons May-Oct, 3-hour averages for 9:00-18:00)')

p1=ax1.scatter(df_tmp1['VPD'],df_tmp1['an_data'],marker='o',c=df_tmp1.index.hour,cmap='viridis') #scatter(x,y)
p1a=ax1.scatter(df_tmp1.loc[st:ed,'VPD'],df_tmp1.loc[st:ed,'an_data'],marker='x',c='black') #scatter(x,y)
ax1.set_ylim(0,35)
ax1.set_xlim(-2,40)
ax1.set_xlabel('VPD [hPa]')
ax1.set_ylabel(r'An [$\mu molm^{-2}s^{-1}$]')
ax1.set_title('VPD')

p2=ax2.scatter(df_tmp2['CO2level1'],df_tmp2['an_data'],marker='o',c=df_tmp2.index.hour,cmap='viridis') #scatter(x,y)
p2a=ax2.scatter(df_tmp2.loc[st:ed,'CO2level1'],df_tmp2.loc[st:ed,'an_data'],marker='x',c='black',label='2018') #scatter(x,y)
ax2.set_ylim(0,35)
ax2.set_xlim(370,500)
ax2.set_xlabel('CO2 [ppm]')
ax2.set_ylabel(r'An [$\mu molm^{-2}s^{-1}$]')
ax2.set_title('CO2')

p3=ax3.scatter(df_tmp3['PAR'],df_tmp3['an_data'],marker='o',c=df_tmp3.index.hour,cmap='viridis') #scatter(x,y)
p3a=ax3.scatter(df_tmp3.loc[st:ed,'PAR'],df_tmp3.loc[st:ed,'an_data'],marker='x',c='black') #scatter(x,y)
ax3.set_ylim(0,35)
#ax3.set_xlim(-2,40)
ax3.set_xlabel(r'PAR [Wm-2]')
ax3.set_ylabel(r'An [$\mu molm^{-2}s^{-1}$]')
ax3.set_title('PAR')

p4=ax4.scatter(df_tmp4['Ts_data'],df_tmp4['an_data'],marker='o',c=df_tmp4.index.hour,cmap='viridis') #scatter(x,y)
p4a=ax4.scatter(df_tmp4.loc[st:ed,'Ts_data'],df_tmp4.loc[st:ed,'an_data'],marker='x',c='black') #scatter(x,y)
#ax4.set_ylim(0,35)
#ax4.set_xlim(-20,35)
ax4.set_xlabel(r'Tsurface [deg C]')
ax4.set_ylabel(r'An [$\mu molm^{-2}s^{-1}$]')
ax4.set_title('Ts')

ax2.legend()

fig.tight_layout()
#fig.subplots_adjust(top=0.80) #fix the top margin text overlap  
#cbar=plt.colorbar(p1,ax=ax)
#cbar.ax.set_ylabel('Hour')

fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
fig.colorbar(p1, cax=cbar_ax)
cbar_ax.set_ylabel('Hour')

In [None]:
start='2008-05-01 00:00'
end='2018-08-30 00:00'

sim_data_VPD=df_Comb_filter.loc[start:end,'VPD'].resample('3H').mean()#.between_time("11:00", "18:00")
sim_data_VPD=sim_data_VPD.dropna() #eliminate NaN entries created by the .loc slice
sim_data_CO2=df_profile_filter.loc[start:end,'CO2level1'].resample('3H').mean()#.between_time("11:00", "18:00")
sim_data_CO2=sim_data_CO2.dropna() #eliminate NaN entries created by the .loc slice
sim_data_PAR=df_meteo_filter.loc[start:end,'PAR'].resample('3H').mean()#.between_time("11:00", "18:00")
sim_data_PAR=sim_data_PAR.dropna() #eliminate NaN entries created by the .loc slice

Ts_data=Ts_C.loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
Ts_data=Ts_data.rename('Ts_data') #turn into a named series so it works with df.merge() properly
Ts_data=Ts_data.resample('3H').mean()#.between_time("11:00", "18:00")
Ts_data=Ts_data.dropna() #eliminate NaN entries created by the .loc slice

an_data=an_umol.loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
an_data=an_data.dropna() #eliminate NaN entries created by the .loc slice
an_data=an_data.rename('an_data') #turn into a named series so it works with df.merge() properly


df_tmp1 = pd.DataFrame()
df_tmp1['an_data']=an_data
df_tmp1=df_tmp1.merge(sim_data_VPD, how='inner',left_index=True, right_index=True)

df_tmp2 = pd.DataFrame()
df_tmp2['an_data']=an_data
df_tmp2=df_tmp2.merge(sim_data_CO2, how='inner',left_index=True, right_index=True)

df_tmp3 = pd.DataFrame()
df_tmp3['an_data']=an_data
df_tmp3=df_tmp3.merge(sim_data_PAR, how='inner',left_index=True, right_index=True)

df_tmp4 = pd.DataFrame()
df_tmp4['an_data']=an_data
df_tmp4=df_tmp4.merge(Ts_data, how='inner',left_index=True, right_index=True)

fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(nrows=2, ncols=2)
fig.suptitle('Sensitivity of GPP to VPD, CO2, PAR, and T_surface. \n (2001-2020 during growth seasons Apr-Sep, 3-hour averages)')

p1=ax1.scatter(df_tmp1['VPD'],df_tmp1['an_data'],marker='o',c=df_tmp1.index.hour,s=0.7,cmap='viridis') #scatter(x,y)
ax1.set_ylim(0,35)
ax1.set_xlim(-2,40)
ax1.set_xlabel('VPD [hPa]')
ax1.set_ylabel(r'An [$\mu molm^{-2}s^{-1}$]')
ax1.set_title('VPD')

p2=ax2.scatter(df_tmp2['CO2level1'],df_tmp2['an_data'],marker='o',c=df_tmp1.index.hour,s=0.7,cmap='viridis') #scatter(x,y)
ax2.set_ylim(0,35)
ax2.set_xlim(370,500)
ax2.set_xlabel('CO2 [ppm]')
ax2.set_ylabel(r'An [$\mu molm^{-2}s^{-1}$]')
ax2.set_title('CO2')

p1=ax3.scatter(df_tmp3['PAR'],df_tmp3['an_data'],marker='o',c=df_tmp1.index.hour,s=0.7,cmap='viridis') #scatter(x,y)
ax3.set_ylim(0,35)
#ax3.set_xlim(-2,40)
ax3.set_xlabel(r'PAR [Wm-2]')
ax3.set_ylabel(r'An [$\mu molm^{-2}s^{-1}$]')
ax3.set_title('PAR')

p1=ax4.scatter(df_tmp4['Ts_data'],df_tmp4['an_data'],marker='o',c=df_tmp1.index.hour,s=0.7,cmap='viridis') #scatter(x,y)
#ax4.set_ylim(0,35)
#ax4.set_xlim(-20,35)
ax4.set_xlabel(r'T_surface [deg C]')
ax4.set_ylabel(r'An [$\mu molm^{-2}s^{-1}$]')
ax4.set_title('Ts')

fig.tight_layout()
#fig.subplots_adjust(top=0.80) #fix the top margin text overlap  
#cbar=plt.colorbar(p1,ax=ax)
#cbar.ax.set_ylabel('Hour')

fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
fig.colorbar(p1, cax=cbar_ax)
cbar_ax.set_ylabel('Hour')

### H2O

In [None]:
start='2008-05-01 00:00'
end='2018-08-30 00:00'

sim_data_VPD=df_Comb_filter.loc[start:end,'VPD'].resample('3H').mean()#.between_time("11:00", "18:00")
sim_data_VPD=sim_data_VPD.dropna() #eliminate NaN entries created by the .loc slice
sim_data_CO2=df_profile_filter.loc[start:end,'CO2level1'].resample('3H').mean()#.between_time("11:00", "18:00")
sim_data_CO2=sim_data_CO2.dropna() #eliminate NaN entries created by the .loc slice
sim_data_PAR=df_meteo_filter.loc[start:end,'PAR'].resample('3H').mean()#.between_time("11:00", "18:00")
sim_data_PAR=sim_data_PAR.dropna() #eliminate NaN entries created by the .loc slice

Ts_data=Ts_C.loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
Ts_data=Ts_data.rename('Ts_data') #turn into a named series so it works with df.merge() properly
Ts_data=Ts_data.resample('3H').mean()#.between_time("11:00", "18:00")
Ts_data=Ts_data.dropna() #eliminate NaN entries created by the .loc slice

LE_data=df_ET.loc[start:end,'ET_VPD'].resample('3H').mean()#.between_time("11:00", "18:00")
LE_data=LE_data.dropna() #eliminate NaN entries created by the .loc slice
LE_data=LE_data.rename('LE_data') #turn into a named series so it works with df.merge() properly


df_tmp1 = pd.DataFrame()
df_tmp1['LE_data']=LE_data
df_tmp1=df_tmp1.merge(sim_data_VPD, how='inner',left_index=True, right_index=True)

df_tmp2 = pd.DataFrame()
df_tmp2['LE_data']=LE_data
df_tmp2=df_tmp2.merge(sim_data_CO2, how='inner',left_index=True, right_index=True)

df_tmp3 = pd.DataFrame()
df_tmp3['LE_data']=LE_data
df_tmp3=df_tmp3.merge(sim_data_PAR, how='inner',left_index=True, right_index=True)

df_tmp4 = pd.DataFrame()
df_tmp4['LE_data']=LE_data
df_tmp4=df_tmp4.merge(Ts_data, how='inner',left_index=True, right_index=True)

fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(nrows=2, ncols=2)
fig.suptitle('Sensitivity of LE to VPD, CO2, PAR, and T_surface. \n (2001-2020 during growth seasons Apr-Sep,, 3-hour averages)')

p1=ax1.scatter(df_tmp1['VPD'],df_tmp1['LE_data'],marker='o',c=df_tmp1.index.hour,s=0.7,cmap='viridis') #scatter(x,y)
#ax1.set_ylim(0,35)
ax1.set_xlim(-2,40)
ax1.set_xlabel('VPD [hPa]')
ax1.set_ylabel(r'LE [$Wm^{-2}$]')
ax1.set_title('VPD')

p2=ax2.scatter(df_tmp2['CO2level1'],df_tmp2['LE_data'],marker='o',c=df_tmp2.index.hour,s=0.7,cmap='viridis') #scatter(x,y)
#ax2.set_ylim(0,35)
#ax2.set_xlim(370,500)
ax2.set_xlabel('CO2 [ppm]')
ax2.set_ylabel(r'LE [$Wm^{-2}$]')
ax2.set_title('CO2')

p1=ax3.scatter(df_tmp3['PAR'],df_tmp3['LE_data'],marker='o',c=df_tmp3.index.hour,s=0.7,cmap='viridis') #scatter(x,y)
#ax3.set_ylim(0,35)
#ax3.set_xlim(-2,40)
ax3.set_xlabel(r'PAR [Wm-2]')
ax3.set_ylabel(r'LE [$Wm^{-2}$]')
ax3.set_title('PAR')

p1=ax4.scatter(df_tmp4['Ts_data'],df_tmp4['LE_data'],marker='o',c=df_tmp4.index.hour,s=0.7,cmap='viridis') #scatter(x,y)
#ax4.set_ylim(0,35)
#ax4.set_xlim(-20,35)
ax4.set_xlabel(r'T_surface [deg C]')
ax4.set_ylabel(r'LE [$Wm^{-2}$]')
ax4.set_title('Ts')

fig.tight_layout()
#fig.subplots_adjust(top=0.80) #fix the top margin text overlap  
#cbar=plt.colorbar(p1,ax=ax)
#cbar.ax.set_ylabel('Hour')

fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
fig.colorbar(p1, cax=cbar_ax)
cbar_ax.set_ylabel('Hour')

In [None]:
start='2008-05-01 00:00'
end='2018-08-30 00:00'
st='2018-05-01 00:00'
ed='2018-08-30 00:00'
time_s="9:00"
time_e="18:00"


sim_data_VPD=df_Comb_filter.loc[start:end,'VPD'].resample('3H').mean().between_time(time_s, time_e)
sim_data_VPD=sim_data_VPD.dropna() #eliminate NaN entries created by the .loc slice
sim_data_CO2=df_profile_filter.loc[start:end,'CO2level1'].resample('3H').mean().between_time(time_s, time_e)
sim_data_CO2=sim_data_CO2.dropna() #eliminate NaN entries created by the .loc slice
sim_data_PAR=df_meteo_filter.loc[start:end,'PAR'].resample('3H').mean().between_time(time_s, time_e)
sim_data_PAR=sim_data_PAR.dropna() #eliminate NaN entries created by the .loc slice

Ts_data=Ts_C.loc[start:end].resample('3H').mean().between_time(time_s, time_e)
Ts_data=Ts_data.rename('Ts_data') #turn into a named series so it works with df.merge() properly
#Ts_data=Ts_data.resample('3H').mean()#.between_time("11:00", "18:00")
Ts_data=Ts_data.dropna() #eliminate NaN entries created by the .loc slice

LE_data=df_ET.loc[start:end,'ET_VPD'].resample('3H').mean().between_time(time_s, time_e)
LE_data=LE_data.dropna() #eliminate NaN entries created by the .loc slice
LE_data=LE_data.rename('LE_data') #turn into a named series so it works with df.merge() properly


df_tmp1 = pd.DataFrame()
df_tmp1['LE_data']=LE_data
df_tmp1=df_tmp1.merge(sim_data_VPD, how='inner',left_index=True, right_index=True)

df_tmp2 = pd.DataFrame()
df_tmp2['LE_data']=LE_data
df_tmp2=df_tmp2.merge(sim_data_CO2, how='inner',left_index=True, right_index=True)

df_tmp3 = pd.DataFrame()
df_tmp3['LE_data']=LE_data
df_tmp3=df_tmp3.merge(sim_data_PAR, how='inner',left_index=True, right_index=True)

df_tmp4 = pd.DataFrame()
df_tmp4['LE_data']=LE_data
df_tmp4=df_tmp4.merge(Ts_data, how='inner',left_index=True, right_index=True)

fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(nrows=2, ncols=2)
fig.suptitle('Sensitivity of LE to VPD, CO2, PAR, and Ts. \n (2008-2017 during growth seasons May-Oct, 3-hour averages for 9:00-18:00)')

p1=ax1.scatter(df_tmp1['VPD'],df_tmp1['LE_data'],marker='o',c=df_tmp1.index.hour,cmap='viridis') #scatter(x,y)
p1a=ax1.scatter(df_tmp1.loc[st:ed,'VPD'],df_tmp1.loc[st:ed,'LE_data'],marker='x',c='black') #scatter(x,y)
#ax1.set_ylim(0,35)
ax1.set_xlim(-2,40)
ax1.set_xlabel('VPD [hPa]')
ax1.set_ylabel(r'LE [$Wm^{-2}$]')
ax1.set_title('VPD')

p2=ax2.scatter(df_tmp2['CO2level1'],df_tmp2['LE_data'],marker='o',c=df_tmp1.index.hour,cmap='viridis') #scatter(x,y)
p2a=ax2.scatter(df_tmp2.loc[st:ed,'CO2level1'],df_tmp1.loc[st:ed,'LE_data'],marker='x',c='black',label='2018') #scatter(x,y)
#ax2.set_ylim(0,35)
#ax2.set_xlim(370,500)
ax2.set_xlabel('CO2 [ppm]')
ax2.set_ylabel(r'LE [$Wm^{-2}$]')
ax2.set_title('CO2')

p3=ax3.scatter(df_tmp3['PAR'],df_tmp3['LE_data'],marker='o',c=df_tmp1.index.hour,cmap='viridis') #scatter(x,y)
p3a=ax3.scatter(df_tmp3.loc[st:ed,'PAR'],df_tmp1.loc[st:ed,'LE_data'],marker='x',c='black') #scatter(x,y)
#ax3.set_ylim(0,35)
#ax3.set_xlim(-2,40)
ax3.set_xlabel(r'PAR [Wm-2]')
ax3.set_ylabel(r'LE [$Wm^{-2}$]')
ax3.set_title('PAR')

p4=ax4.scatter(df_tmp4['Ts_data'],df_tmp4['LE_data'],marker='o',c=df_tmp1.index.hour,cmap='viridis') #scatter(x,y)
p4a=ax4.scatter(df_tmp4.loc[st:ed,'Ts_data'],df_tmp1.loc[st:ed,'LE_data'],marker='x',c='black') #scatter(x,y)
#ax4.set_ylim(0,35)
#ax4.set_xlim(-20,35)
ax4.set_xlabel(r'Tsurface [deg C]')
ax4.set_ylabel(r'LE [$Wm^{-2}$]')
ax4.set_title('Ts')

fig.tight_layout()
#fig.subplots_adjust(top=0.80) #fix the top margin text overlap  
#cbar=plt.colorbar(p1,ax=ax)
#cbar.ax.set_ylabel('Hour')

ax2.legend()

fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
fig.colorbar(p1, cax=cbar_ax)
cbar_ax.set_ylabel('Hour')

## rs

In [None]:
start='2008-05-01 00:00'
end='2018-08-30 00:00'

sim_data_VPD=df_Comb_filter.loc[start:end,'VPD'].resample('3H').mean().between_time("9:00", "18:00")
sim_data_VPD=sim_data_VPD.dropna() #eliminate NaN entries created by the .loc slice
sim_data_CO2=df_profile_filter.loc[start:end,'CO2level1'].resample('3H').mean().between_time("9:00", "18:00")
sim_data_CO2=sim_data_CO2.dropna() #eliminate NaN entries created by the .loc slice
sim_data_PAR=df_meteo_filter.loc[start:end,'PAR'].resample('3H').mean().between_time("9:00", "18:00")
sim_data_PAR=sim_data_PAR.dropna() #eliminate NaN entries created by the .loc slice
 
Ts_data=Ts_C.loc[start:end].resample('3H').mean().between_time("9:00", "18:00")
Ts_data=Ts_data.rename('Ts_data') #turn into a named series so it works with df.merge() properly
Ts_data=Ts_data.dropna() #eliminate NaN entries created by the .loc slice

rs_data=rs.loc[start:end].resample('3H').mean().between_time("9:00", "18:00")
rs_data=rs_data.dropna() #eliminate NaN entries created by the .loc slice
rs_data=rs_data.rename('rs_data') #turn into a named series so it works with df.merge() properly


df_tmp1 = pd.DataFrame()
df_tmp1['rs_data']=rs_data
df_tmp1=df_tmp1.merge(sim_data_VPD, how='inner',left_index=True, right_index=True)

df_tmp2 = pd.DataFrame()
df_tmp2['rs_data']=rs_data
df_tmp2=df_tmp2.merge(sim_data_CO2, how='inner',left_index=True, right_index=True)

df_tmp3 = pd.DataFrame()
df_tmp3['rs_data']=rs_data
df_tmp3=df_tmp3.merge(sim_data_PAR, how='inner',left_index=True, right_index=True)

df_tmp4 = pd.DataFrame()
df_tmp4['rs_data']=rs_data
df_tmp4=df_tmp4.merge(Ts_data, how='inner',left_index=True, right_index=True)

fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(nrows=2, ncols=2)
fig.suptitle('Sensitivity of rs to VPD, CO2, PAR, and Ts. \n (2008-2017 during growth seasons May-Oct, 3-hour averages between 9:00-18:00)')

p1=ax1.scatter(df_tmp1['VPD'],df_tmp1['rs_data'],marker='o',c=df_tmp1.index.hour,cmap='viridis') #scatter(x,y)
#ax1.set_ylim(0,35)
ax1.set_xlim(-2,40)
ax1.set_xlabel('VPD [hPa]')
ax1.set_ylabel(r'rs [$Wm^{-2}$]')
ax1.set_title('VPD')

p2=ax2.scatter(df_tmp2['CO2level1'],df_tmp2['rs_data'],marker='o',c=df_tmp2.index.hour,cmap='viridis') #scatter(x,y)
#ax2.set_ylim(0,35)
#ax2.set_xlim(370,500)
ax2.set_xlabel('CO2 [ppm]')
ax2.set_ylabel(r'rs [$sm^{-1}$]')
ax2.set_title('CO2')

p1=ax3.scatter(df_tmp3['PAR'],df_tmp3['rs_data'],marker='o',c=df_tmp3.index.hour,cmap='viridis') #scatter(x,y)
#ax3.set_ylim(0,35)
#ax3.set_xlim(-2,40)
ax3.set_xlabel(r'PAR [Wm-2]')
ax3.set_ylabel(r'rs [$sm^{-1}$]')
ax3.set_title('PAR')

p1=ax4.scatter(df_tmp4['Ts_data'],df_tmp4['rs_data'],marker='o',c=df_tmp4.index.hour,cmap='viridis') #scatter(x,y)
#ax4.set_ylim(0,35)
#ax4.set_xlim(-20,35)
ax4.set_xlabel(r'Tsurface [deg C]')
ax4.set_ylabel(r'rs [$sm^{-1}$]')
ax4.set_title('Ts')

fig.tight_layout()
#fig.subplots_adjust(top=0.80) #fix the top margin text overlap  
#cbar=plt.colorbar(p1,ax=ax)
#cbar.ax.set_ylabel('Hour')

fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
fig.colorbar(p1, cax=cbar_ax)
cbar_ax.set_ylabel('Hour')

In [None]:
start='2008-05-01 00:00'
end='2018-08-30 00:00'
st='2018-05-01 00:00'
ed='2018-08-30 00:00'
#time_s="9:00"
#time_e="18:00"

sim_data_VPD=df_Comb_filter.loc[start:end,'VPD'].resample('3H').mean().between_time("9:00", "15:00")
sim_data_VPD=sim_data_VPD.dropna() #eliminate NaN entries created by the .loc slice
sim_data_CO2=df_profile_filter.loc[start:end,'CO2level1'].resample('3H').mean().between_time("9:00", "15:00")
sim_data_CO2=sim_data_CO2.dropna() #eliminate NaN entries created by the .loc slice
sim_data_PAR=df_meteo_filter.loc[start:end,'PAR'].resample('3H').mean().between_time("9:00", "15:00")
sim_data_PAR=sim_data_PAR.dropna() #eliminate NaN entries created by the .loc slice

Ts_data=Ts_C.loc[start:end].resample('3H').mean().between_time("9:00", "15:00")
Ts_data=Ts_data.rename('Ts_data') #turn into a named series so it works with df.merge() properly
Ts_data=Ts_data.dropna() #eliminate NaN entries created by the .loc slice

rs_data=rs.loc[start:end].resample('3H').mean().between_time("9:00", "15:00")
rs_data=rs_data.dropna() #eliminate NaN entries created by the .loc slice
rs_data=rs_data.rename('rs_data') #turn into a named series so it works with df.merge() properly


df_tmp1 = pd.DataFrame()
df_tmp1['rs_data']=rs_data
df_tmp1=df_tmp1.merge(sim_data_VPD, how='inner',left_index=True, right_index=True)

df_tmp2 = pd.DataFrame()
df_tmp2['rs_data']=rs_data
df_tmp2=df_tmp2.merge(sim_data_CO2, how='inner',left_index=True, right_index=True)

df_tmp3 = pd.DataFrame()
df_tmp3['rs_data']=rs_data
df_tmp3=df_tmp3.merge(sim_data_PAR, how='inner',left_index=True, right_index=True)

df_tmp4 = pd.DataFrame()
df_tmp4['rs_data']=rs_data
df_tmp4=df_tmp4.merge(Ts_data, how='inner',left_index=True, right_index=True)

fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(nrows=2, ncols=2)
fig.suptitle('Sensitivity of rs to VPD, CO2, PAR, and Ts. \n (2008-2017 during growth seasons May-Oct, 3-hour averages between 9:00-18:00)')

p1=ax1.scatter(df_tmp1['VPD'],df_tmp1['rs_data'],marker='o',c=df_tmp1.index.hour,cmap='viridis') #scatter(x,y)
p1a=ax1.scatter(df_tmp1.loc[st:ed,'VPD'],df_tmp1.loc[st:ed,'rs_data'],marker='x',c='black',label='2018') #scatter(x,y)
#ax1.set_ylim(0,35)
ax1.set_xlim(-2,40)
ax1.set_xlabel('VPD [hPa]')
ax1.set_ylabel(r'rs [$Wm^{-2}$]')
ax1.set_title('VPD')

p2=ax2.scatter(df_tmp2['CO2level1'],df_tmp2['rs_data'],marker='o',c=df_tmp2.index.hour,cmap='viridis') #scatter(x,y)
p2a=ax2.scatter(df_tmp2.loc[st:ed,'CO2level1'],df_tmp2.loc[st:ed,'rs_data'],marker='x',c='black') #scatter(x,y)
#ax2.set_ylim(0,35)
#ax2.set_xlim(370,500)
ax2.set_xlabel('CO2 [ppm]')
ax2.set_ylabel(r'rs [$sm^{-1}$]')
ax2.set_title('CO2')

p3=ax3.scatter(df_tmp3['PAR'],df_tmp3['rs_data'],marker='o',c=df_tmp3.index.hour,cmap='viridis') #scatter(x,y)
p3a=ax3.scatter(df_tmp3.loc[st:ed,'PAR'],df_tmp3.loc[st:ed,'rs_data'],marker='x',c='black') #scatter(x,y)
#ax3.set_ylim(0,35)
#ax3.set_xlim(-2,40)
ax3.set_xlabel(r'PAR [Wm-2]')
ax3.set_ylabel(r'rs [$sm^{-1}$]')
ax3.set_title('PAR')

p4=ax4.scatter(df_tmp4['Ts_data'],df_tmp4['rs_data'],marker='o',c=df_tmp4.index.hour,cmap='viridis') #scatter(x,y)
p4a=ax4.scatter(df_tmp4.loc[st:ed,'Ts_data'],df_tmp4.loc[st:ed,'rs_data'],marker='x',size=0.1,c='black') #scatter(x,y)
#ax4.set_ylim(0,35)
#ax4.set_xlim(-20,35)
ax4.set_xlabel(r'Tsurface [deg C]')
ax4.set_ylabel(r'rs [$sm^{-1}$]')
ax4.set_title('Ts')

fig.tight_layout()
#fig.subplots_adjust(top=0.80) #fix the top margin text overlap  
#cbar=plt.colorbar(p1,ax=ax)
#cbar.ax.set_ylabel('Hour')

ax1.legend()

fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
fig.colorbar(p1, cax=cbar_ax)
cbar_ax.set_ylabel('Hour')

#### quick check R2 for different months

In [None]:
#check what the R2 value is month-wise for certain years

for year in [2017, 2018]:
    for month in [4,5,6,7,8]:
        
        start='{}-0{}-01 00:00'.format(year, month)
        end='{}-0{}-01 00:00'.format(year, month+1)
        
        sim_data=an_umol.loc[an_umol>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
        sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
        measured_data=df_Comb['GPP_f'].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
        measured_data=measured_data.dropna()
        
        df_tmp = pd.DataFrame()
        df_tmp['sim_data']=sim_data
        
        df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)
        
        slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['GPP_f'], df_tmp['sim_data'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
        print('{}-0{}-01 -'.format(year, month),'{}-0{}-01'.format(year, month+1),' R2: {:.3f}'.format(r_value**2))


### H2O flux

In [None]:
#def p_dailysum(df):
#    for idx,row in df.iterrows():
#        #calc precipitation sum from 00:00 to 23:59
#        start_day=np.datetime64(str(idx.date())+ ' 00:00')
#        end_day=np.datetime64(str(idx.date())+ ' 23:30')
#        df.loc[idx,'daily_prec']=df.loc[start_day:end_day ,'P(mast)'].sum() #iloc[row, column]
#return df

In [None]:
#old way, deprecated (takes long due to .iterrows() )
#df_ET_1=p_dailysum(df_ET)

In [None]:
# Add in precipitation last 3h and 24 hour values so it can be used for filtering
df_ET_1=df_ET.copy()
df_tmp=df_ET_1.groupby(pd.Grouper(freq='D'))[['P(mast)']].sum()
df_ET_1['daily_prec']=df_tmp.resample('30min').ffill()
df_ET_1['last3h_prec']=df_ET_1['P(mast)'].rolling('3H').sum()
df_ET_1['last24h_prec']=df_ET_1['P(mast)'].rolling('24H').sum()
df_ET_1['last2day_prec']=df_ET_1['P(mast)'].rolling('48H').sum()
df_ET_1['last3day_prec']=df_ET_1['P(mast)'].rolling('72H').sum()
#testing
df_ET_1.loc['2017-04-15 00:00':'2017-04-30 00:00','daily_prec'].plot()
df_ET_1.loc['2017-04-15 00:00':'2017-04-30 00:00','last3h_prec'].plot()
df_ET_1.loc['2017-04-15 00:00':'2017-04-30 00:00','last24h_prec'].plot()
df_ET_1.loc['2017-04-15 00:00':'2017-04-30 00:00','last3day_prec'].plot()

In [None]:
#re-draw better correlation plot (longer dataset)
start='2017-04-01 00:00'
end='2017-07-30 00:00'

#new filter
sim_data = df_ET_1.loc[start:end,('ET_VPD','last24h_prec','last3day_prec')]
sim_data = sim_data.loc[sim_data['last3day_prec']==0]
sim_data= sim_data.resample('1H').mean()
sim_data=sim_data.dropna()

measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('1H').mean()#.between_time("6:00", "9:00")
measured_data=measured_data.dropna()
   
df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD'],c=df_tmp.index.hour,cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-10,175)
ax.set_xlim(-10,175)
ax.set_xlabel(r'measured data LE (EC) [Wm$^{-2}$]')
ax.set_ylabel(r'simulated data LE (A-gs) [Wm$^{-2}$]')
fig.suptitle('Correlation of simulated to measured LE. April-Oct 2017, (1hour mean, 00:00-23:59) \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
ax.set_title('(Only days on which daily  sum of Precip. is 0)')
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Hour')

In [None]:
#re-draw better correlation plot (plot leaf temperature)
start='2017-04-14 00:00'
end='2017-08-30 00:00'

#new filter
sim_data = df_ET_1.loc[start:end,('ET_VPD','last3day_prec','T_sfc_C')]
sim_data = sim_data.loc[sim_data['last3day_prec']==0.0]
sim_data= sim_data.resample('3H').mean()
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean()
measured_data=measured_data.dropna()

#df_tmp = pd.DataFrame()
df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD'],c=df_tmp['T_sfc_C'],cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-10,175)
ax.set_xlim(-10,175)
ax.set_xlabel(r'measured data LE (EC) [Wm$^{-2}$]')
ax.set_ylabel(r'simulated data LE (A-gs) [Wm$^{-2}$]')
fig.suptitle('Correlation of simulated to measured LE. April-Oct 2017, (3hour mean, 00:00-23:59) \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
ax.set_title('(Only days on which daily  sum of Precip. is 0)')
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Temp [oC]')

#### quick check R2 for different months

In [None]:
#check what the R2 value is month-wise for certain years
#USING THE NEW FUNCTION p_dailysum

for year in [2017, 2018]:
    for month in [4,5,6,7,8]:
         
        start='{}-0{}-01 00:00'.format(year, month)
        end='{}-0{}-01 00:00'.format(year, month+1)

        
        sim_data = df_ET_1.loc[start:end,('ET_VPD','last3day_prec')]
        sim_data = sim_data.loc[sim_data['last3day_prec']==0.0]
        sim_data= sim_data.resample('3H').mean()#.between_time("11:00", "18:00")
        sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
        measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
        measured_data=measured_data.dropna()
        
        #df_tmp = pd.DataFrame()
        df_tmp=sim_data.copy()
        df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)
        #print(df_tmp)
        slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
        print('{}-0{}-01 -'.format(year, month),'{}-0{}-01'.format(year, month+1),'Slope:{:.3f} R2: {:.3f}'.format(slope,r_value**2))


## first draft sensitivity study

In [None]:
#second run Ag-s part

# Prepare data for Run A-gs model
#df_Comb_filter['Tair']
df_Comb_filter_1=df_Comb_filter.copy()
df_Comb_filter_1['Tair']=df_Comb_filter_1['Tair']+10
#df_meteo_filter['L(o)corr']
#df_Comb_filter['VPD']
#df_meteo_filter['PAR']

# Run A-gs model
fstr=1.0
an_run2,an_umol_run2,rs_run2, ra_run2  = runAgs(df_profile_filter,df_Comb_filter_1,df_meteo_filter,df_EC_filter,fstr)

#initialize df_ET
df_ET_run2 = pd.concat([df_meteo['L(o)'],df_meteo['Te-L(o)'],df_profile['Pressure'],df_Comb['VPD'],df_Comb['rH'],df_meteo['P(mast)']],axis=1,sort=False)
df_ET_run2['p_kPa']=df_ET_run2['Pressure']/10 # from hpa to kpa
df_ET_run2['VPD_adj']=df_ET_run2['VPD'].loc[df_ET_run2['VPD']>0] #some outlier values for VPD are negative, remove from dataset
df_ET_run2['VPD_adj']=df_ET_run2['VPD_adj']/10  # VPD from df_Comb is in hPa, I need kPa, so hPa/10 = kPa

df_ET_run2=calc_LE(df_ET_run2,rs_run2,ra_run2)
df_ET_run2['last3day_prec']=df_ET_run2['P(mast)'].rolling('72H').sum()

# plot
start='2017-04-01 00:00'
end='2017-07-30 00:00'

#new filter
sim_data = df_ET_1.loc[start:end,('ET_VPD','last3day_prec')]
sim_data = sim_data.loc[sim_data['last3day_prec']==0]
sim_data= sim_data.resample('1H').mean()
sim_data=sim_data.dropna()

sim2_data = df_ET_run2.loc[start:end,('ET_VPD','last3day_prec')]
sim2_data = sim2_data.loc[sim2_data['last3day_prec']==0]
sim2_data= sim2_data.resample('1H').mean()
sim2_data=sim2_data.dropna()
#sim2_data.rename(columns={'ET_VPD': 'ET_VPD2', 'last3day_prec': 'last3day_prec2'}, inplace=True)

measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('1H').mean()
measured_data=measured_data.dropna()

df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

df_tmp2=sim2_data.copy()
df_tmp2=df_tmp2.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

slope2, intercept2, r_value2, p_value2, std_err2 = stats.linregress(df_tmp2['LE'], df_tmp2['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value2**2)
print('slope, intercept:', slope2, intercept2)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD'],marker='o') #scatter(x,y)
p2=ax.scatter(df_tmp2['LE'],df_tmp2['ET_VPD'],marker='^') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='black',label='default')
ax.axline((0.0,intercept2),slope=slope2,c='black',linestyle='dashed' ,label='Tair+1')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-10,175)
ax.set_xlim(-10,175)
ax.set_xlabel(r'measured data LE (EC) [Wm$^{-2}$]')
ax.set_ylabel(r'simulated data LE (A-gs) [Wm$^{-2}$]')
fig.suptitle('Correlation simulated-measured LE. April-Oct 2017, (1hour mean) \n default slope = {:.3f}, default R2 = {:.3f} \n Tair+1 slope = {:.3f}, Tair+1 R2 = {:.3f}'.format(slope,r_value**2,slope2,r_value2**2))
#ax.set_title('Tair+1 slope = {:.3f}, Tair+1 R2 = {:.3f}'.format(slope2,r_value2**2))
ax.legend(loc='upper left')



In [None]:
#second run Ag-s part

# Prepare data for Run A-gs model
#df_Comb_filter['Tair']
#df_meteo_filter['L(o)corr']
#df_Comb_filter['VPD']
df_meteo_filter_1=df_meteo_filter.copy()
df_meteo_filter_1['PAR']=df_meteo_filter['PAR']*0.9

# Run A-gs model
fstr=1.0
an_run2,an_umol_run2,rs_run2, ra_run2  = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter_1,df_EC_filter,fstr)

#an_final.iloc[500:520].plot()
#an_run2.iloc[500:520].plot()

#initialize df_ET
df_ET_run2=calc_LE(df_ET,rs,ra)
df_ET_run2['last3day_prec']=df_ET_run2['P(mast)'].rolling('72H').sum()

# plot
start='2017-04-01 00:00'
end='2017-07-30 00:00'

#new filter
sim_data = df_ET_1.loc[start:end,('ET_VPD','last3day_prec')]
sim_data = sim_data.loc[sim_data['last3day_prec']==0]
sim_data = sim_data.resample('1H').mean()
sim_data = sim_data.dropna()

sim2_data = df_ET_run2.loc[start:end,('ET_VPD','last3day_prec')]
sim2_data = sim2_data.loc[sim2_data['last3day_prec']==0]
sim2_data = sim2_data.resample('1H').mean()
sim2_data = sim2_data.dropna()
#sim2_data.rename(columns={'ET_VPD': 'ET_VPD2', 'last3day_prec': 'last3day_prec2'}, inplace=True)

measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('1H').mean()
measured_data=measured_data.dropna()

df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

df_tmp2=sim2_data.copy()
df_tmp2=df_tmp2.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

slope2, intercept2, r_value2, p_value2, std_err2 = stats.linregress(df_tmp2['LE'], df_tmp2['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value2**2)
print('slope, intercept:', slope2, intercept2)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD'],marker='o') #scatter(x,y)
p2=ax.scatter(df_tmp2['LE'],df_tmp2['ET_VPD'],marker='^') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='black',label='default')
ax.axline((0.0,intercept2),slope=slope2,c='black',linestyle='dashed' ,label='Tair+1')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-10,175)
ax.set_xlim(-10,175)
ax.set_xlabel(r'measured data LE (EC) [Wm$^{-2}$]')
ax.set_ylabel(r'simulated data LE (A-gs) [Wm$^{-2}$]')
fig.suptitle('Correlation simulated-measured LE. April-Oct 2017, (1hour mean) \n default slope = {:.3f}, default R2 = {:.3f} \n Tair+1 slope = {:.3f}, Tair+1 R2 = {:.3f}'.format(slope,r_value**2,slope2,r_value2**2))
#ax.set_title('Tair+1 slope = {:.3f}, Tair+1 R2 = {:.3f}'.format(slope2,r_value2**2))
ax.legend(loc='upper left')



In [None]:
#second run Ag-s part

# Prepare data for Run A-gs model
#df_Comb_filter['Tair']
#df_Comb_filter_1=df_Comb_filter.copy()
#df_Comb_filter_1['Tair']=df_Comb_filter_1['Tair']+1
#df_meteo_filter['L(o)corr']
#df_Comb_filter['VPD']
df_meteo_filter_1=df_meteo_filter_1
df_meteo_filter_1['PAR']=df_meteo_filter_1['PAR']*1.1

# Run A-gs model
fstr=1.0
an_run2,an_umol_run2,rs_run2, ra_run2  = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter_1,df_EC_filter,fstr)

df_ET_run3=calc_LE(df_ET,rs,ra)
df_ET_run3['last3day_prec']=df_ET_run3['P(mast)'].rolling('72H').sum()

# plot
start='2017-04-01 00:00'
end='2017-07-30 00:00'

#new filter
sim_data = df_ET_run3.loc[start:end,('ET_VPD','last3day_prec')]
#sim_data = sim_data.loc[sim_data['last24h_prec']<=0.5]
sim_data = sim_data.loc[sim_data['last3day_prec']==0]
sim_data= sim_data.resample('1H').mean()#.between_time("6:00", "9:00")
#sim_data= sim_data.between_time("6:00", "9:00")
sim_data=sim_data.dropna()

#print(sim_data)

measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('1H').mean()#.between_time("6:00", "9:00")
#measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].between_time("6:00", "9:00")
measured_data=measured_data.dropna()
   
#df_tmp = pd.DataFrame()
df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD'],c=df_tmp.index.hour,cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-10,175)
ax.set_xlim(-10,175)
ax.set_xlabel(r'measured data LE (EC) [Wm$^{-2}$]')
ax.set_ylabel(r'simulated data LE (A-gs) [Wm$^{-2}$]')
fig.suptitle('Correlation of simulated to measured LE. April-Oct 2017, (3hour mean, 00:00-23:59) \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
ax.set_title('(Only days on which daily  sum of Precip. is 0)')
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Hour')

### WIP hourly averages section (hour as bins)

In [None]:
#draw binned An
start='2017-04-01 00:00'
end='2017-08-30 00:00'

sim_data=an_umol.loc[an_umol>0].loc[start:end].resample('1H').mean()#.between_time("11:00", "18:00")
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['GPP_f'].loc[start:end].resample('1H').mean()#.between_time("11:00", "18:00")
measured_data=measured_data.dropna()

#new filter
#sim_data = df_ET_1.loc[start:end,('ET_VPD2','last24h_prec')]
#sim_data = sim_data.loc[sim_data['last24h_prec']<=0.5]

#df_tmp = pd.DataFrame()
df_tmp=pd.DataFrame(sim_data)
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

#slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['GPP_f'], df_tmp['sim_data'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
#print('R2: ',r_value**2)
#print('slope, intercept:', slope, intercept)

hours_list=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]

d={}
d['meas_avg'] = [df_tmp.loc[df_tmp.index.hour==i,'GPP_f'].mean() for i in hours_list]
d['meas_q1'] = [df_tmp.loc[df_tmp.index.hour==i,'GPP_f'].quantile(q=0.25) for i in hours_list]
d['meas_q3'] = [df_tmp.loc[df_tmp.index.hour==i,'GPP_f'].quantile(q=0.75) for i in hours_list]
d['sim_avg'] = [df_tmp.loc[df_tmp.index.hour==i,0].mean() for i in hours_list]
d['sim_q1'] = [df_tmp.loc[df_tmp.index.hour==i,0].quantile(q=0.25) for i in hours_list]
d['sim_q3'] = [df_tmp.loc[df_tmp.index.hour==i,0].quantile(q=0.75) for i in hours_list]

df_avg=pd.DataFrame(d)

fig,ax = plt.subplots()
p1=ax.plot(df_avg['meas_avg'],marker='o',label='measured')
p2=ax.plot(df_avg['sim_avg'],marker='s',label='simulated')
ax.fill_between(hours_list,df_avg['meas_q1'],df_avg['meas_q3'],alpha=0.1)
ax.fill_between(hours_list,df_avg['sim_q1'],df_avg['sim_q3'],alpha=0.1)

#ax.set_ylim(-5,40)
#ax.set_xlim(-5,40)
plt.xticks(hours_list) 
ax.set_xlabel('hour')
ax.set_ylabel(r'Assimilation [$\mu molm^{-2}s^{-1}$]')
fig.suptitle('Simulated and measured An by hour, Apr-Oct 2017, (1hour mean)')
ax.legend(loc='upper left')


In [None]:
#draw binned An
start='2017-04-01 00:00'
end='2017-08-30 00:00'

sim_data = df_ET_1.loc[start:end,('ET_VPD','last24h_prec','last3day_prec')]
sim_data = sim_data.loc[sim_data['last3day_prec']==0.0]
print(sim_data.size)
sim_data=sim_data.resample('1H').mean()
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb.loc[start:end,'LE']#.resample('1H').mean()#.between_time("11:00", "18:00")
measured_data=measured_data.loc[measured_data>0]
measured_data=measured_data.resample('1H').mean()
measured_data=measured_data.dropna()

#df_tmp = pd.DataFrame()
df_tmp=pd.DataFrame(sim_data)
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

hours_list=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]

d={}
d['meas_avg'] = [df_tmp.loc[df_tmp.index.hour==i,'LE'].mean() for i in hours_list]
d['meas_q1'] = [df_tmp.loc[df_tmp.index.hour==i,'LE'].quantile(q=0.25) for i in hours_list]
d['meas_q3'] = [df_tmp.loc[df_tmp.index.hour==i,'LE'].quantile(q=0.75) for i in hours_list]
d['sim_avg'] = [df_tmp.loc[df_tmp.index.hour==i,'ET_VPD'].mean() for i in hours_list]
d['sim_q1'] = [df_tmp.loc[df_tmp.index.hour==i,'ET_VPD'].quantile(q=0.25) for i in hours_list]
d['sim_q3'] = [df_tmp.loc[df_tmp.index.hour==i,'ET_VPD'].quantile(q=0.75) for i in hours_list]

df_avg=pd.DataFrame(d)

fig,ax = plt.subplots()
p1=ax.plot(df_avg['meas_avg'],marker='o',label='measured')
p2=ax.plot(df_avg['sim_avg'],marker='s',label='simulated')
ax.fill_between(hours_list,df_avg['meas_q1'],df_avg['meas_q3'],alpha=0.1)
ax.fill_between(hours_list,df_avg['sim_q1'],df_avg['sim_q3'],alpha=0.1)

#ax.set_ylim(-5,40)
#ax.set_xlim(-5,40)
plt.xticks(hours_list) 
ax.set_xlabel('hour')
ax.set_ylabel(r'LE [$Wm^{-2}s^{-1}$]')
fig.suptitle('Simulated and measured LE by hour, Apr-Oct 2017, (1hour mean)')
ax.legend(loc='upper left')


In [None]:
start='2017-04-14 00:00'
end='2017-04-30 00:00'

df_ra=pd.DataFrame({'ra':ra})
df_rs=pd.DataFrame({'rs':rs})

df_gs=pd.concat([df_profile['Pressure'],df_Comb['LE'],df_Comb['Tair'],df_Comb['VPD'],df_meteo['P(mast)'],df_meteo['R(net)'],df_meteo['G1']],axis=1,sort=False)
df_gs['Tair_K']=df_gs['Tair']+273.15
df_gs['p_kPa']=df_gs['Pressure']/10
df_gs['p_kPa']=df_gs['p_kPa'].loc[df_gs['p_kPa']>1]
df_gs['LE']=df_gs['LE'].loc[df_gs['LE']>-5]
df_gs['VPD_adj']=df_gs['VPD'].loc[df_gs['VPD']>0] #some outlier values for VPD are negative, remove from dataset
df_gs['VPD_adj']=df_gs['VPD_adj']/10  # VPD from df_Comb is in hPa, I need kPa, so hPa/10 = kPa

#calculating q and cp

    #constants:
e_sat_0 = 0.6107 # e_sat_0 = 0.6107 kPa or 610.7 Pa
a = 7.5
b = 237.3 # oC (geen typo)
df_gs['e_sat'] = e_sat_0 * 10**(a*df_gs['Tair'] / (b+df_gs['Tair'])) #  T_sfc_C in oC
Rd = 287 # J/kg K
Rv = 462 # J/kg K
df_gs['q_sat'] = Rd/Rv * df_gs['e_sat']/df_gs['p_kPa'] #q = Rd/Rv * e/p -> q_sat = Rd/Rv * e_sat/p
df_gs['e_act'] = df_gs['e_sat'] - df_gs['VPD_adj'] #VPD = e_sat - e_act -> e_act = e_sat - VPD
df_gs['q_act'] = Rd/Rv * df_gs['e_act']/df_gs['p_kPa'] #q = Rd/Rv * e/p . output is in g/kg (order of magnitude 0.005-0.015)
#now from q calculate cp
cpd=1004.67 #J/kg/K
df_gs['cp']=cpd*(1+0.84*df_gs['q_act']) #q in g/kg?

#calculate inverse of aerodynamic resistance, aero conductance
df_ra['ga'] = 1. / df_ra['ra']
#and stomatal conductance
df_rs['gs'] = 1. / df_rs['rs']

#df_ET_runGs = pd.concat([df_meteo['L(o)'],df_meteo['Te-L(o)'],df_profile['Pressure'],df_Comb['VPD'],df_Comb['rH'],df_meteo['P(mast)']],axis=1,sort=False)
#df_ET_runGs['p_kPa']=df_ET_runGs['Pressure']/10 # from hpa to kpa
#df_ET_runGs['VPD_adj']=df_ET_runGs['VPD'].loc[df_ET_runGs['VPD']>0] #some outlier values for VPD are negative, remove from dataset
#df_ET_runGs['VPD_adj']=df_ET_runGs['VPD_adj']/10  # VPD from df_Comb is in hPa, I need kPa, so hPa/10 = kPa

#df_ET_runGs=calc_LE(df_ET_runGs,rs_runGs,ra_runGs)
#df_ET_runGs['last3day_prec']=df_ET_runGs['P(mast)'].rolling('72H').sum()

#sim_data = df_ET_1.loc[start:end,('ET_VPD','last24h_prec', 'T_sfc_C')]
#measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")

#rs.plot()

In [None]:
#gs sim vs gs calculated plot
#like in research paper by eline floor houwen

df_gs=df_gs.resample('3H').mean()
df_gs=df_gs.between_time('9:00','15:00')

rho=1.2
#cpd=1004.67 #J/kg/K
#cp=cpd*(1+0.84q) #q in g/kg?
cp=1006  #J/kg/K , approximation
Lv=2.26 #MJ/kg, based on 2260000 J/kg 

df_gs['gamma'] = (cp * df_gs['p_kPa']) / (0.622 * Lv) #cp in J/kg/K , P in Pa, Lv in MJ/kg


df_gs['delta'] = 4098 * (0.6108 * np.exp((17.27*df_gs['Tair'])/(df_gs['Tair']+237.3))) / ((df_gs['Tair']+237.3)**2)  # T in C, 

LE=df_gs['LE']
gamma=df_gs['gamma']
ga=df_ra['ga']
delta=df_gs['delta']
Rnet=df_gs['R(net)']
G=df_gs['G1']
cp=df_gs['cp']
VPD=df_gs['VPD_adj']

first_term = LE*gamma*ga
second_term=delta*(Rnet-G)
third_term=rho*cp*ga*VPD
fourth_term=LE*(delta+gamma)
                
df_gs['gs'] = (first_term) / ( (second_term) + (third_term) - (fourth_term) )


#df_gs['gs'] = (LE*gamma*ga) / ( (delta*(Rnet-G)) + (rho*cp*ga*VPD) - (LE*(delta+gamma)) )


#df_gs['gs'] = (df_gs['LE'] * df_gs['gamma'] * df_ra['ga']) /(df_gs['delta']*(df_gs['R(net)']-df_gs['G1']) + rho*df_gs['cp']*df_ra['ga']*df_gs['VPD_adj'] - df_gs['LE']*(df_gs['delta']+df_gs['gamma']))

df_gs['gs'].plot()
#first_term.plot()
#second_term.plot()
#third_term.plot()
#fourth_term.plot()
#((second_term) + (third_term)).plot()
#plotting


In [None]:
df_rs['gs'].plot()

In [None]:
cpd=1004.67 #J/kg/K
q=0.005
cp=cpd*(1+0.84*q) #q in g/kg?
cp

In [None]:
df

In [None]:
df_meteo['R(net)'].plot()
#df_meteo['G1'].plot()
#(df_meteo['R(net)']-df_meteo['G1']).plot()

#df_meteo['-q'].plot()

In [None]:
df_soil['SM-Lit'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-003'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-020'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-050'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-100'].loc['2017-01-01':'2017-12-30'].plot()

In [None]:
df_soil['SM-Lit'].loc['2017-04-17':'2017-04-22'].plot()
df_soil['SM-003'].loc['2017-04-17':'2017-04-22'].plot()
df_soil['SM-020'].loc['2017-04-17':'2017-04-22'].plot()
df_soil['SM-050'].loc['2017-04-17':'2017-04-22'].plot()
df_soil['SM-100'].loc['2017-04-17':'2017-04-22'].plot()