# A-gs model and implementation (simulation CO2 and H2O flux)

## Initialize data and model

### Setup and fetch data

In [None]:
# Settings
Username   = 'Beheerder'
years      = range(2008,2018)    #(1997,2021) # Set years to download

In [None]:
import os
datapath   = os.path.join('../')
print('datapath is set to %s'%datapath)

# !pip install numpy
# !pip install pandas
# !pip install matplotlib
# !pip install plotly 
# !pip install cufflinks
#!pip install colorspacious
#!pip install seaborn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import plotly.express as px
#import cufflinks as cf
import matplotlib.dates as mdate
import matplotlib.ticker as ticker
from matplotlib import cm
#from colorspacious import cspace_converter
import scipy.stats as stats
#cf.go_offline()
# cf.set_config_file(offline=False, world_readable=True)

from datetime import datetime, timedelta
import sys
sys.path.insert(0, os.path.join(datapath,'PythonScripts'))
from Loobos_Toolbox import dateparse, dateparse_Gapfilled, Read_LoobosEddFinal, Read_LooStor, Read_LoodatGapfill, Read_Loobos_halfhourly, Read_Loobos_meteo, Read_Loobos_soil, Read_Loobos_profile

from Ags_model import runAgs, calc_LE

In [None]:
#these next two lines are to prevent re-loading the data. If you want to re-load data, instead comment them out
if not 'progress' in globals(): progress = list()
if not 'dataloaded' in progress:
  # Read files
    df_EC           = Read_LoobosEddFinal    (years,datapath)
    df_Stor         = Read_LooStor           (years,datapath)
    df_Comb         = Read_LoodatGapfill     (years,datapath)
    df_NEE          = Read_Loobos_halfhourly (years,datapath)
    df_meteo        = Read_Loobos_meteo      (years,datapath)
    df_soil         = Read_Loobos_soil       (years,datapath) 
    df_profile      = Read_Loobos_profile    (years,datapath)
    progress.append('dataloaded')

In [None]:
# Make filter for GPP orginial data and not gapfilled
#General filters
I = ((df_Comb['GPP_fqc']==0)&(df_meteo['PAR']>0))
#t = df_profile.index                                          
#time = (t < np.datetime64('2013-05-08')) | (t > np.datetime64('2013-06-01'))

# Filter for CO2 data
CO2 = (df_profile['CO2level1'] > 300)

# Filter for L(o)corr data
Locorr= (df_meteo['L(o)corr']>0) 

# Filter for VPD data
VPD = (df_Comb['VPD']>=0)

# Filter for U-star
Ustar = (df_EC['U-star']>=0)

# Combine all filters
filter = I & CO2 & Locorr & VPD & Ustar

#Column 'CO2' is input from df_profile
#df_profile_CO2 = df_profile[CO2]
#df_profile_filter = df_profile_CO2[I]
df_profile_filter = df_profile[filter]

#Column 'L(o)corr' and 'PAR' are inputs from df_meteo
#df_meteo_CO2 = df_meteo[CO2]
#df_meteo_filter = df_meteo_CO2[I]
df_meteo_filter = df_meteo[filter]

#Columns 'VPD' and 'Tair' are inputs from df_Comb
#df_Comb_CO2 = df_Comb[CO2]
#df_Comb_filter = df_Comb_CO2[I]
df_Comb_filter = df_Comb[filter]

# Columns 'Mea_Windsp' and 'U-star' are inputs from df_EC
#df_EC_CO2 = df_EC[CO2]
#df_EC_filter = df_EC_CO2[I]
df_EC_filter = df_EC[filter]

### Run A-gs model

In [None]:
# Run A-gs model

an_final_10,an_umol_10,rs_10, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=1.0)
an_final_8,an_umol_8,rs_8, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=0.8)
an_final_6,an_umol_6,rs_6, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=0.6)
an_final_4,an_umol_4,rs_4, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=0.4)
an_final_2,an_umol_2,rs_2, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=0.2)


In [None]:
GPP_10=an_umol_10.resample("M").mean()
GPP_10.plot(label="fstr=1.0",legend='yes',title="Monthly average GPP model output",ylabel="umolm-2s-1")
GPP_8=an_umol_8.resample("M").mean()
GPP_8.plot(label="fstr=0.8",legend='yes')
GPP_6=an_umol_6.resample("M").mean()
GPP_6.plot(label="fstr=0.6",legend='yes')
GPP_4=an_umol_4.resample("M").mean()
GPP_4.plot(label="fstr=0.4",legend='yes')
GPP_2=an_umol_2.resample("M").mean()
GPP_2.plot(label="fstr=0.2",legend='yes')
GPP_meas=df_Comb_filter['GPP_f'].resample("M").mean()
GPP_meas.plot(c='black', linestyle='dashed',label='measured',legend='yes')

In [None]:
rs_10.resample("M").mean().plot()
rs_8.resample("M").mean().plot()
rs_6.resample("M").mean().plot()
rs_2.resample("M").mean().plot()

## Calcuate ET

### Assemble dataframe 'df_ET' that will hold output and fill with inputs

In [None]:
def init_ETframe(rs_series):
    df_ET = pd.concat([df_meteo['L(o)'],df_meteo['Te-L(o)'],df_profile['Pressure'],df_Comb['VPD'],df_Comb['rH'],df_meteo['P(mast)']],axis=1,sort=False)
    #convert Pressure from hPa to kPa 
    df_ET['p_kPa']=df_ET['Pressure']/10
    df_ET['VPD_adj']=df_ET['VPD'].loc[df_ET['VPD']>0] #some outlier values for VPD are negative, remove from dataset
    df_ET['VPD_adj']=df_ET['VPD_adj']/10  # VPD from df_Comb is in hPa, I need kPa, so hPa/10 = kPa
    df_ET['rs']=rs_series.to_frame(name="rs")
    df_ET['ra']=ra.to_frame(name="ra")
    return df_ET

In [None]:
df_ET_10=init_ETframe(rs_10)
df_ET_8=init_ETframe(rs_8)
df_ET_6=init_ETframe(rs_6)
df_ET_4=init_ETframe(rs_4)
df_ET_2=init_ETframe(rs_2)

In [None]:
df_ET_10=calc_LE(df_ET_10)
df_ET_8=calc_LE(df_ET_8)
df_ET_6=calc_LE(df_ET_6)
df_ET_4=calc_LE(df_ET_4)
df_ET_2=calc_LE(df_ET_2)

In [None]:
#df_ET_10['L(o)_corr'].resample('M').mean().plot(label='mine',legend='y')
#df_meteo_filter['L(o)corr'].resample('M').mean().plot(label='dataset',legend='y')

In [None]:
df_ET_meas=df_Comb_filter.copy()
df_ET_meas=df_ET_meas.loc[df_ET_meas['LE']>=0]
df_ET_meas=df_ET_meas.resample("M").mean()
#df_ET_meas['LE'].plot()

In [None]:
df_ET_10['ET_VPD'].resample('M').mean().plot(label="fstr=1.0",legend='yes',title="Monthly average LE model output",ylabel="Wm-2")
df_ET_8['ET_VPD'].resample('M').mean().plot(label="fstr=0.8",legend='yes')
df_ET_6['ET_VPD'].resample('M').mean().plot(label="fstr=0.6",legend='yes')
df_ET_4['ET_VPD'].resample('M').mean().plot(label="fstr=0.4",legend='yes')
df_ET_2['ET_VPD'].resample('M').mean().plot(label="fstr=0.2",legend='yes')
df_ET_meas['LE'].resample('M').mean().plot(c='black', linestyle='dashed',label='measured',legend='yes')

### calculate ET

In [None]:
#df_ET_10=calc_LE(df_ET,rs_10,ra_10)
#df_ET_8=calc_LE(df_ET,rs_8,ra_8)
#df_ET_6=calc_LE(df_ET,rs_6,ra_6) 
#df_ET_4=calc_LE(df_ET,rs_4,ra_4)
#df_ET_2=calc_LE(df_ET,rs_2,ra_2)

In [None]:
#df_ET_10['ET_VPD'].resample("M").mean().plot()
#measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].resample('M').mean()
#measured_data.plot(legend="measured")

#I_filter = ((df_Comb['GPP_fqc']==0)&(df_Comb['LE']>0)&(df_Comb['VPD'] > 0))

#VPD_filter = (df_Comb['VPD'] > 0)
#df_test=df_Comb[I_filter]

#df_test['LE'].resample('M').mean().plot()
#df_Comb['LE'].resample('M').mean().plot()

## Correlation plots

### CO2 flux

In [None]:
an_umol = an_umol_8

In [None]:
#start='2017-05-01 00:00'
#end='2017-05-30 00:00'
start='2008-05-01 00:00'
end='2018-09-30 00:00'
sim_data=an_umol.loc[an_umol>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['GPP_f'].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
measured_data=measured_data.dropna()

df_tmp = pd.DataFrame()
df_tmp['sim_data']=sim_data

df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['GPP_f'], df_tmp['sim_data'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['GPP_f'],df_tmp['sim_data'],c=df_tmp.index.hour,cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-5,40)
ax.set_xlim(-5,40)
ax.set_xlabel(r'measured data An (GPP_f) [$\mu molm^{-2}s^{-1}$]')
ax.set_ylabel(r'simulated data An (A-gs) [$\mu molm^{-2}s^{-1}$]')
fig.suptitle('Correlation of simulated to measured An, May 2017, (3hour mean, 00:00-23:59) \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Hour')

#### quick check R2 for different months

In [None]:
#check what the R2 value is month-wise for certain years

for year in [2017, 2018]:
    for month in [4,5,6,7,8]:
        
        start='{}-0{}-01 00:00'.format(year, month)
        end='{}-0{}-01 00:00'.format(year, month+1)
        
        sim_data=an_umol.loc[an_umol>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
        sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
        measured_data=df_Comb['GPP_f'].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
        measured_data=measured_data.dropna()
        
        df_tmp = pd.DataFrame()
        df_tmp['sim_data']=sim_data
        
        df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)
        
        slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['GPP_f'], df_tmp['sim_data'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
        print('{}-0{}-01 -'.format(year, month),'{}-0{}-01'.format(year, month+1),' R2: {:.3f}'.format(r_value**2))


### H2O flux

In [None]:
#def p_dailysum(df):
#    for idx,row in df.iterrows():
#        #calc precipitation sum from 00:00 to 23:59
#        start_day=np.datetime64(str(idx.date())+ ' 00:00')
#        end_day=np.datetime64(str(idx.date())+ ' 23:30')
#        df.loc[idx,'daily_prec']=df.loc[start_day:end_day ,'P(mast)'].sum() #iloc[row, column]
#return df

In [None]:
#old way, deprecated (takes long due to .iterrows() )
#df_ET_1=p_dailysum(df_ET)

In [None]:
# Add in precipitation last 3h and 24 hour values so it can be used for filtering
#df_ET_1=df_ET.copy()
#df_tmp=df_ET_1.groupby(pd.Grouper(freq='D'))[['P(mast)']].sum()
#df_ET_1['daily_prec']=df_tmp.resample('30min').ffill()
#df_ET_1['last3h_prec']=df_ET_1['P(mast)'].rolling('3H').sum()
#df_ET_1['last24h_prec']=df_ET_1['P(mast)'].rolling('24H').sum()
#df_ET_1['last2day_prec']=df_ET_1['P(mast)'].rolling('48H').sum()
#df_ET_1['last3day_prec']=df_ET_1['P(mast)'].rolling('72H').sum()
df_ET_8['last3day_prec']=df_ET_8['P(mast)'].rolling('72H').sum()
df_ET_6['last3day_prec']=df_ET_6['P(mast)'].rolling('72H').sum()
df_ET_4['last3day_prec']=df_ET_4['P(mast)'].rolling('72H').sum()

In [None]:
#re-draw better correlation plot (longer dataset)
start='2017-04-01 00:00'
end='2017-07-30 00:00'

#new filter
sim_data = df_ET_6.loc[start:end,('ET_VPD','last3day_prec')]
sim_data = sim_data.loc[sim_data['last3day_prec']==0]
sim_data= sim_data.resample('3H').mean().between_time("6:00", "9:00")
sim_data=sim_data.dropna()

measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean().between_time("6:00", "9:00")
measured_data=measured_data.dropna()
   
df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD'],c=df_tmp.index.hour,cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-10,175)
ax.set_xlim(-10,175)
ax.set_xlabel(r'measured data LE (EC) [Wm$^{-2}$]')
ax.set_ylabel(r'simulated data LE (A-gs) [Wm$^{-2}$]')
fig.suptitle('Correlation of simulated to measured LE. April-Oct 2017, (1hour mean, 00:00-23:59) \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
ax.set_title('(Only days on which daily  sum of Precip. is 0)')
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Hour')

In [None]:
#re-draw better correlation plot (plot leaf temperature)
start='2017-04-14 00:00'
end='2017-08-30 00:00'

#new filter
sim_data = df_ET_1.loc[start:end,('ET_VPD','last3day_prec','T_sfc_C')]
sim_data = sim_data.loc[sim_data['last3day_prec']==0.0]
sim_data= sim_data.resample('3H').mean()
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean()
measured_data=measured_data.dropna()

#df_tmp = pd.DataFrame()
df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD'],c=df_tmp['T_sfc_C'],cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-10,175)
ax.set_xlim(-10,175)
ax.set_xlabel(r'measured data LE (EC) [Wm$^{-2}$]')
ax.set_ylabel(r'simulated data LE (A-gs) [Wm$^{-2}$]')
fig.suptitle('Correlation of simulated to measured LE. April-Oct 2017, (3hour mean, 00:00-23:59) \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
ax.set_title('(Only days on which daily  sum of Precip. is 0)')
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Temp [oC]')

#### quick check R2 for different months

In [None]:
#check what the R2 value is month-wise for certain years
#USING THE NEW FUNCTION p_dailysum

for year in [2017, 2018]:
    for month in [4,5,6,7,8]:
         
        start='{}-0{}-01 00:00'.format(year, month)
        end='{}-0{}-01 00:00'.format(year, month+1)

        
        sim_data = df_ET_1.loc[start:end,('ET_VPD','last3day_prec')]
        sim_data = sim_data.loc[sim_data['last3day_prec']==0.0]
        sim_data= sim_data.resample('3H').mean()#.between_time("11:00", "18:00")
        sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
        measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
        measured_data=measured_data.dropna()
        
        #df_tmp = pd.DataFrame()
        df_tmp=sim_data.copy()
        df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)
        #print(df_tmp)
        slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
        print('{}-0{}-01 -'.format(year, month),'{}-0{}-01'.format(year, month+1),'Slope:{:.3f} R2: {:.3f}'.format(slope,r_value**2))


## first draft sensitivity study

In [None]:
#second run Ag-s part

# Prepare data for Run A-gs model
#df_Comb_filter['Tair']
df_Comb_filter_1=df_Comb_filter.copy()
df_Comb_filter_1['Tair']=df_Comb_filter_1['Tair']+10
#df_meteo_filter['L(o)corr']
#df_Comb_filter['VPD']
#df_meteo_filter['PAR']

# Run A-gs model
fstr=1.0
an_run2,an_umol_run2,rs_run2, ra_run2  = runAgs(df_profile_filter,df_Comb_filter_1,df_meteo_filter,df_EC_filter,fstr)

#initialize df_ET
 
df_ET_run2['p_kPa']=df_ET_run2['Pressure']/10 # from hpa to kpa
df_ET_run2['VPD_adj']=df_ET_run2['VPD'].loc[df_ET_run2['VPD']>0] #some outlier values for VPD are negative, remove from dataset
df_ET_run2['VPD_adj']=df_ET_run2['VPD_adj']/10  # VPD from df_Comb is in hPa, I need kPa, so hPa/10 = kPa

df_ET_run2=calc_LE(df_ET_run2,rs_run2,ra_run2)
df_ET_run2['last3day_prec']=df_ET_run2['P(mast)'].rolling('72H').sum()

# plot
start='2017-04-01 00:00'
end='2017-07-30 00:00'

#new filter
sim_data = df_ET_1.loc[start:end,('ET_VPD','last3day_prec')]
sim_data = sim_data.loc[sim_data['last3day_prec']==0]
sim_data= sim_data.resample('1H').mean()
sim_data=sim_data.dropna()

sim2_data = df_ET_run2.loc[start:end,('ET_VPD','last3day_prec')]
sim2_data = sim2_data.loc[sim2_data['last3day_prec']==0]
sim2_data= sim2_data.resample('1H').mean()
sim2_data=sim2_data.dropna()
#sim2_data.rename(columns={'ET_VPD': 'ET_VPD2', 'last3day_prec': 'last3day_prec2'}, inplace=True)

measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('1H').mean()
measured_data=measured_data.dropna()

df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

df_tmp2=sim2_data.copy()
df_tmp2=df_tmp2.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

slope2, intercept2, r_value2, p_value2, std_err2 = stats.linregress(df_tmp2['LE'], df_tmp2['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value2**2)
print('slope, intercept:', slope2, intercept2)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD'],marker='o') #scatter(x,y)
p2=ax.scatter(df_tmp2['LE'],df_tmp2['ET_VPD'],marker='^') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='black',label='default')
ax.axline((0.0,intercept2),slope=slope2,c='black',linestyle='dashed' ,label='Tair+1')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-10,175)
ax.set_xlim(-10,175)
ax.set_xlabel(r'measured data LE (EC) [Wm$^{-2}$]')
ax.set_ylabel(r'simulated data LE (A-gs) [Wm$^{-2}$]')
fig.suptitle('Correlation simulated-measured LE. April-Oct 2017, (1hour mean) \n default slope = {:.3f}, default R2 = {:.3f} \n Tair+1 slope = {:.3f}, Tair+1 R2 = {:.3f}'.format(slope,r_value**2,slope2,r_value2**2))
#ax.set_title('Tair+1 slope = {:.3f}, Tair+1 R2 = {:.3f}'.format(slope2,r_value2**2))
ax.legend(loc='upper left')



In [None]:
#second run Ag-s part

# Prepare data for Run A-gs model
#df_Comb_filter['Tair']
#df_meteo_filter['L(o)corr']
#df_Comb_filter['VPD']
df_meteo_filter_1=df_meteo_filter.copy()
df_meteo_filter_1['PAR']=df_meteo_filter['PAR']*0.9

# Run A-gs model
fstr=1.0
an_run2,an_umol_run2,rs_run2, ra_run2  = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter_1,df_EC_filter,fstr)

#an_final.iloc[500:520].plot()
#an_run2.iloc[500:520].plot()

#initialize df_ET
df_ET_run2=calc_LE(df_ET,rs,ra)
df_ET_run2['last3day_prec']=df_ET_run2['P(mast)'].rolling('72H').sum()

# plot
start='2017-04-01 00:00'
end='2017-07-30 00:00'

#new filter
sim_data = df_ET_1.loc[start:end,('ET_VPD','last3day_prec')]
sim_data = sim_data.loc[sim_data['last3day_prec']==0]
sim_data = sim_data.resample('1H').mean()
sim_data = sim_data.dropna()

sim2_data = df_ET_run2.loc[start:end,('ET_VPD','last3day_prec')]
sim2_data = sim2_data.loc[sim2_data['last3day_prec']==0]
sim2_data = sim2_data.resample('1H').mean()
sim2_data = sim2_data.dropna()
#sim2_data.rename(columns={'ET_VPD': 'ET_VPD2', 'last3day_prec': 'last3day_prec2'}, inplace=True)

measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('1H').mean()
measured_data=measured_data.dropna()

df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

df_tmp2=sim2_data.copy()
df_tmp2=df_tmp2.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

slope2, intercept2, r_value2, p_value2, std_err2 = stats.linregress(df_tmp2['LE'], df_tmp2['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value2**2)
print('slope, intercept:', slope2, intercept2)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD'],marker='o') #scatter(x,y)
p2=ax.scatter(df_tmp2['LE'],df_tmp2['ET_VPD'],marker='^') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='black',label='default')
ax.axline((0.0,intercept2),slope=slope2,c='black',linestyle='dashed' ,label='Tair+1')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-10,175)
ax.set_xlim(-10,175)
ax.set_xlabel(r'measured data LE (EC) [Wm$^{-2}$]')
ax.set_ylabel(r'simulated data LE (A-gs) [Wm$^{-2}$]')
fig.suptitle('Correlation simulated-measured LE. April-Oct 2017, (1hour mean) \n default slope = {:.3f}, default R2 = {:.3f} \n Tair+1 slope = {:.3f}, Tair+1 R2 = {:.3f}'.format(slope,r_value**2,slope2,r_value2**2))
#ax.set_title('Tair+1 slope = {:.3f}, Tair+1 R2 = {:.3f}'.format(slope2,r_value2**2))
ax.legend(loc='upper left')



In [None]:
#second run Ag-s part

# Prepare data for Run A-gs model
#df_Comb_filter['Tair']
#df_Comb_filter_1=df_Comb_filter.copy()
#df_Comb_filter_1['Tair']=df_Comb_filter_1['Tair']+1
#df_meteo_filter['L(o)corr']
#df_Comb_filter['VPD']
df_meteo_filter_1=df_meteo_filter_1
df_meteo_filter_1['PAR']=df_meteo_filter_1['PAR']*1.1

# Run A-gs model
fstr=1.0
an_run2,an_umol_run2,rs_run2, ra_run2  = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter_1,df_EC_filter,fstr)

df_ET_run3=calc_LE(df_ET,rs,ra)
df_ET_run3['last3day_prec']=df_ET_run3['P(mast)'].rolling('72H').sum()

# plot
start='2017-04-01 00:00'
end='2017-07-30 00:00'

#new filter
sim_data = df_ET_run3.loc[start:end,('ET_VPD','last3day_prec')]
#sim_data = sim_data.loc[sim_data['last24h_prec']<=0.5]
sim_data = sim_data.loc[sim_data['last3day_prec']==0]
sim_data= sim_data.resample('1H').mean()#.between_time("6:00", "9:00")
#sim_data= sim_data.between_time("6:00", "9:00")
sim_data=sim_data.dropna()

#print(sim_data)

measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('1H').mean()#.between_time("6:00", "9:00")
#measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].between_time("6:00", "9:00")
measured_data=measured_data.dropna()
   
#df_tmp = pd.DataFrame()
df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD'],c=df_tmp.index.hour,cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-10,175)
ax.set_xlim(-10,175)
ax.set_xlabel(r'measured data LE (EC) [Wm$^{-2}$]')
ax.set_ylabel(r'simulated data LE (A-gs) [Wm$^{-2}$]')
fig.suptitle('Correlation of simulated to measured LE. April-Oct 2017, (3hour mean, 00:00-23:59) \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
ax.set_title('(Only days on which daily  sum of Precip. is 0)')
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Hour')

### WIP hourly averages section (hour as bins)

In [None]:
#draw binned An
#start='2017-04-01 00:00'
#end='2017-08-30 00:00'
start='2008-04-01 00:00'
end='2018-08-30 00:00'

an_umol=an_umol_8

sim_data=an_umol.loc[an_umol>0].loc[start:end].resample('1H').mean()#.between_time("11:00", "18:00")
sim_data = sim_data.loc[(sim_data.index.month>=5) & (sim_data.index.month<=8)]
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['GPP_f'].loc[start:end].resample('1H').mean()#.between_time("11:00", "18:00")
measured_data=measured_data.dropna()

#new filter
#sim_data = df_ET_1.loc[start:end,('ET_VPD2','last24h_prec')]
#sim_data = sim_data.loc[sim_data['last24h_prec']<=0.5]

#df_tmp = pd.DataFrame()
df_tmp=pd.DataFrame(sim_data)
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

#slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['GPP_f'], df_tmp['sim_data'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
#print('R2: ',r_value**2)
#print('slope, intercept:', slope, intercept)

hours_list=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]

d={}
d['meas_avg'] = [df_tmp.loc[df_tmp.index.hour==i,'GPP_f'].mean() for i in hours_list]
d['meas_q1'] = [df_tmp.loc[df_tmp.index.hour==i,'GPP_f'].quantile(q=0.25) for i in hours_list]
d['meas_q3'] = [df_tmp.loc[df_tmp.index.hour==i,'GPP_f'].quantile(q=0.75) for i in hours_list]
d['sim_avg'] = [df_tmp.loc[df_tmp.index.hour==i,0].mean() for i in hours_list]
d['sim_q1'] = [df_tmp.loc[df_tmp.index.hour==i,0].quantile(q=0.25) for i in hours_list]
d['sim_q3'] = [df_tmp.loc[df_tmp.index.hour==i,0].quantile(q=0.75) for i in hours_list]

df_avg=pd.DataFrame(d)

fig,ax = plt.subplots()
p1=ax.plot(df_avg['meas_avg'],marker='o',label='mean measured An')
ax.fill_between(hours_list,df_avg['meas_q1'],df_avg['meas_q3'],alpha=0.1,label='IQR')
p2=ax.plot(df_avg['sim_avg'],marker='s',label='mean simulated An')
ax.fill_between(hours_list,df_avg['sim_q1'],df_avg['sim_q3'],alpha=0.1,label='IQR')

#ax.set_ylim(-5,40)
#ax.set_xlim(-5,40)
plt.xticks(hours_list) 
ax.set_xlabel('hour')
ax.set_ylabel(r'An [$\mu molm^{-2}s^{-1}$]')
fig.suptitle('Simulated and measured Assimilation by hour for 2008-2018, \n for months May-Oct, (1hour mean)')
ax.legend(loc='upper left')


In [None]:
#draw binned LE
start='2008-04-01 00:00'
end='2018-08-30 00:00'

sim_data = df_ET_8.loc[start:end,('ET_VPD','last3day_prec')]
sim_data = sim_data.loc[(sim_data.index.month>=5) & (sim_data.index.month<=8)]
sim_data = sim_data.loc[sim_data['last3day_prec']==0.0]
print(sim_data.size)
sim_data=sim_data.resample('1H').mean()
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb_filter.loc[start:end,'LE']#.resample('1H').mean()#.between_time("11:00", "18:00")
measured_data=measured_data.loc[measured_data>0]
measured_data=measured_data.resample('1H').mean()
measured_data=measured_data.dropna()

#df_tmp = pd.DataFrame()
df_tmp=pd.DataFrame(sim_data)
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

hours_list=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]

d={}
d['meas_avg'] = [df_tmp.loc[df_tmp.index.hour==i,'LE'].mean() for i in hours_list]
d['meas_q1'] = [df_tmp.loc[df_tmp.index.hour==i,'LE'].quantile(q=0.25) for i in hours_list]
d['meas_q3'] = [df_tmp.loc[df_tmp.index.hour==i,'LE'].quantile(q=0.75) for i in hours_list]
d['sim_avg'] = [df_tmp.loc[df_tmp.index.hour==i,'ET_VPD'].mean() for i in hours_list]
d['sim_q1'] = [df_tmp.loc[df_tmp.index.hour==i,'ET_VPD'].quantile(q=0.25) for i in hours_list]
d['sim_q3'] = [df_tmp.loc[df_tmp.index.hour==i,'ET_VPD'].quantile(q=0.75) for i in hours_list]

df_avg=pd.DataFrame(d)

fig,ax = plt.subplots()
p1=ax.plot(df_avg['meas_avg'],marker='o',label='mean measured LE')
ax.fill_between(hours_list,df_avg['meas_q1'],df_avg['meas_q3'],alpha=0.1,label='IQR')
p2=ax.plot(df_avg['sim_avg'],marker='s',label='mean simulated LE')
ax.fill_between(hours_list,df_avg['sim_q1'],df_avg['sim_q3'],alpha=0.1,label='IQR')

#ax.set_ylim(-5,40)
#ax.set_xlim(-5,40)
plt.xticks(hours_list) 
ax.set_xlabel('hour')
ax.set_ylabel(r'LE [$Wm^{-2}s^{-1}$]')
fig.suptitle('Simulated and measured Latent Heat flux by hour for 2008-2018, \n for months May-Oct, (1hour mean)')
ax.legend(loc='upper left')


In [None]:
df_soil['SM-Lit'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-003'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-020'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-050'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-100'].loc['2017-01-01':'2017-12-30'].plot()

In [None]:
df_soil['SM-Lit'].loc['2017-04-17':'2017-04-22'].plot()
df_soil['SM-003'].loc['2017-04-17':'2017-04-22'].plot()
df_soil['SM-020'].loc['2017-04-17':'2017-04-22'].plot()
df_soil['SM-050'].loc['2017-04-17':'2017-04-22'].plot()
df_soil['SM-100'].loc['2017-04-17':'2017-04-22'].plot()