# A-gs model and implementation (simulation CO2 and H2O flux)

## Initialize data and model

### Setup and fetch data

In [None]:
# Settings
Username   = 'Beheerder'
years      = range(2001,2021)    #(1997,2021) # Set years to download

In [None]:
import os
datapath   = os.path.join('../../') #since this file is 2 dir down from the data it's ../../
print('datapath is set to %s'%datapath)


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import plotly.express as px
#import cufflinks as cf
import matplotlib.dates as mdate
import matplotlib.ticker as ticker
from matplotlib import cm
#from colorspacious import cspace_converter
import scipy.stats as stats
#cf.go_offline()
# cf.set_config_file(offline=False, world_readable=True)

from datetime import datetime, timedelta
import sys
sys.path.insert(0, os.path.join(datapath,'PythonScripts'))
sys.path.append('../') #append the directory above this one to path to be able to find my python modules

from Loobos_Toolbox import dateparse, dateparse_Gapfilled, Read_LoobosEddFinal, Read_LooStor, Read_LoodatGapfill, Read_Loobos_halfhourly, Read_Loobos_meteo, Read_Loobos_soil, Read_Loobos_profile

from Ags_model import runAgs, runAgs2, calc_LE

In [None]:
#these next two lines are to prevent re-loading the data. If you want to re-load data, instead comment them out
if not 'progress' in globals(): progress = list()
if not 'dataloaded' in progress:
  # Read files
    df_EC           = Read_LoobosEddFinal    (years,datapath)
    df_Stor         = Read_LooStor           (years,datapath)
    df_Comb         = Read_LoodatGapfill     (years,datapath)
    df_NEE          = Read_Loobos_halfhourly (years,datapath)
    df_meteo        = Read_Loobos_meteo      (years,datapath)
    df_soil         = Read_Loobos_soil       (years,datapath) 
    df_profile      = Read_Loobos_profile    (years,datapath)
    progress.append('dataloaded')

In [None]:
# Make filter for GPP orginial data and not gapfilled
#General filters
I = ((df_Comb['GPP_fqc']==0)&(df_meteo['PAR']>0))
#t = df_profile.index                                          
#time = (t < np.datetime64('2013-05-08')) | (t > np.datetime64('2013-06-01'))

# Filter for CO2 data
CO2 = (df_profile['CO2level1'] > 300)

# Filter for L(o)corr data
Locorr= (df_meteo['L(o)corr']>0) 

# Filter for VPD data
VPD = (df_Comb['VPD']>=0)

# Filter for U-star
Ustar = (df_EC['U-star']>=0)

# Combine all filters
filter = I & CO2 & Locorr & VPD & Ustar

#Column 'CO2' is input from df_profile
#df_profile_CO2 = df_profile[CO2]
#df_profile_filter = df_profile_CO2[I]
df_profile_filter = df_profile[filter]

#Column 'L(o)corr' and 'PAR' are inputs from df_meteo
#df_meteo_CO2 = df_meteo[CO2]
#df_meteo_filter = df_meteo_CO2[I]
df_meteo_filter = df_meteo[filter]

#Columns 'VPD' and 'Tair' are inputs from df_Comb
#df_Comb_CO2 = df_Comb[CO2]
#df_Comb_filter = df_Comb_CO2[I]
df_Comb_filter = df_Comb[filter]

# Columns 'Mea_Windsp' and 'U-star' are inputs from df_EC
#df_EC_CO2 = df_EC[CO2]
#df_EC_filter = df_EC_CO2[I]
df_EC_filter = df_EC[filter]

In [None]:
from FilterData import Filter_wrap
CO2,Locorr,VPD,Ustar,df_profile_filter,df_meteo_filter,df_Comb_filter,df_EC_filter=Filter_wrap(df_Comb,df_profile,df_meteo,df_EC,filterversion='default')
#NOTE: df_Stor is NOT FILTERED

In [None]:
from FilterData import Filter_GPP_LE_NEE_VPD 
df_Comb_filter2 = Filter_GPP_LE_NEE_VPD(df_Comb,fqc=None)

### Run A-gs model

In [None]:
# Run A-gs model

an_final_10,an_umol_10,rs_10, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=1.0)
#an_final_8,an_umol_8,rs_8, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=0.8)
#an_final_6,an_umol_6,rs_6, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=0.6)
#an_final_4,an_umol_4,rs_4, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=0.4)
#an_final_2,an_umol_2,rs_2, ra = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=0.2)

#an_final_g,an_umol_g,rs_g, ra_g = runAgs(df_profile_filter,df_Comb_filter,df_meteo_filter,df_EC_filter,fstr=0.8,gmin_input=0.01985)




## Calcuate ET

### Assemble dataframe 'df_ET' that will hold output and fill with inputs

In [None]:
def init_ETframe(rs_series):
    df_ET = pd.concat([df_meteo['L(o)'],df_meteo['Te-L(o)'],df_profile['Pressure'],df_Comb['VPD'],df_Comb['rH'],df_meteo['P(mast)']],axis=1,sort=False)
    #convert Pressure from hPa to kPa 
    df_ET['p_kPa']=df_ET['Pressure']/10
    df_ET['VPD_adj']=df_ET['VPD'].loc[df_ET['VPD']>0] #some outlier values for VPD are negative, remove from dataset
    df_ET['VPD_adj']=df_ET['VPD_adj']/10  # VPD from df_Comb is in hPa, I need kPa, so hPa/10 = kPa
    df_ET['rs']=rs_series.to_frame(name="rs")
    df_ET['ra']=ra.to_frame(name="ra")
    return df_ET

In [None]:
df_ET_10=init_ETframe(rs_10)
#df_ET_8=init_ETframe(rs_8)
#df_ET_6=init_ETframe(rs_6)
#df_ET_4=init_ETframe(rs_4)
#df_ET_2=init_ETframe(rs_2)
#df_ET_g=init_ETframe(rs_g)

In [None]:
df_ET_10=calc_LE(df_ET_10)
#df_ET_8=calc_LE(df_ET_8)
#df_ET_6=calc_LE(df_ET_6)
#df_ET_4=calc_LE(df_ET_4)
#df_ET_2=calc_LE(df_ET_2)
#df_ET_g=calc_LE(df_ET_g)

In [None]:
#df_ET_10['L(o)_corr'].resample('M').mean().plot(label='mine',legend='y')
#df_meteo_filter['L(o)corr'].resample('M').mean().plot(label='dataset',legend='y')

In [None]:
df_ET_meas=df_Comb_filter.copy()
df_ET_meas=df_ET_meas.loc[df_ET_meas['LE']>=0]
df_ET_meas=df_ET_meas.resample("M").mean()
#df_ET_meas['LE'].plot()

In [None]:
df_ET_3h=df_ET_10.resample('3H').mean().between_time('9:00','15:00')
df_ET_3h

In [None]:
#filter for rain
knmi_rain_daily=pd.read_csv('./knmi_rain_daily.csv')
knmi_rain_daily.index=pd.to_datetime(knmi_rain_daily['date'],format='%Y-%m-%d')
knmi_rain_daily['is_zero']=knmi_rain_daily['RD']==0.0
rain_is_zero=knmi_rain_daily.loc[:,['is_zero']]
rain_is_zero=rain_is_zero.resample('3H').ffill()
rain_is_zero_corr=rain_is_zero.copy()
rain_is_zero=rain_is_zero.between_time('9:00','15:00')
df_ET_3h=df_ET_3h.loc['2001-01-01':'2020-12-30'] #the knmi dataset is 1 day shorter
df_ET_3h=df_ET_3h.loc[rain_is_zero['is_zero']==True]
#df_ET_3h

In [None]:
#compare differnce between two datetimeindex
#C = df_ET_3h[~df_ET_3h.index.isin(rain_is_zero.index)]

### calculate ET

## Correlation plots

### CO2 flux

In [None]:
an_umol = an_umol_10

In [None]:
#start='2017-05-01 00:00'
#end='2017-05-30 00:00'
start='2008-05-01 00:00'
end='2018-09-30 00:00'
sim_data=an_umol.loc[an_umol>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "16:00")
sim_data = sim_data.loc[(sim_data.index.month>=4) & (sim_data.index.month<=9)]
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['GPP_f'].loc[start:end]
measured_data=measured_data.loc[measured_data>0]
measured_data=measured_data.resample('3H').mean()#.between_time("11:00", "16:00")
measured_data=measured_data.dropna()

df_tmp = pd.DataFrame()
df_tmp['sim_data']=sim_data

df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['GPP_f'], df_tmp['sim_data'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
#print('R2: ',r_value**2)
#print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['GPP_f'],df_tmp['sim_data'],c=df_tmp.index.hour,s=0.7,cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
#ax.set_ylim(-5,40)
#ax.set_xlim(-5,40)
ax.set_xlabel(r'Observed GPP [$\mu molm^{-2}s^{-1}$]')
ax.set_ylabel(r'Simulated GPP [$\mu molm^{-2}s^{-1}$]')
fig.suptitle('Correlation of simulated to measured GPP flux (3-hour mean) \n for 2001-2020 growing season (Apr-Sep)')
print('intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Hour')
ax.grid()

In [None]:
#start='2017-05-01 00:00'
#end='2017-05-30 00:00'
start='2008-05-01 00:00'
end='2018-09-30 00:00'
sim_data=an_umol.loc[an_umol>0].loc[start:end].resample('3H').mean().between_time("06:00", "12:00")
sim_data = sim_data.loc[(sim_data.index.month>=4) & (sim_data.index.month<=9)]
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['GPP_f'].loc[start:end]
measured_data=measured_data.loc[measured_data>0]
measured_data=measured_data.resample('3H').mean()#.between_time("11:00", "16:00")
measured_data=measured_data.dropna()

df_tmp = pd.DataFrame()
df_tmp['sim_data']=sim_data

df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['GPP_f'], df_tmp['sim_data'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
#print('R2: ',r_value**2)
#print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['GPP_f'],df_tmp['sim_data'],c=df_tmp.index.hour,s=0.7,cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
#ax.set_ylim(-5,40)
#ax.set_xlim(-5,40)
ax.set_xlabel(r'measured GPP [$\mu molm^{-2}s^{-1}$]')
ax.set_ylabel(r'A-gs simulated GPP [$\mu molm^{-2}s^{-1}$]')
fig.suptitle('Correlation of simulated to measured GPP flux (3-hour mean between 06:00-12:00) \n for 2001-2020 growing season (Apr-Sep)')
print('intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Hour')
ax.grid()

### H2O flux

In [None]:
#re-draw better correlation plot (longer dataset)
start='2001-05-01 00:00'
end='2020-09-30 00:00'
 
#new filter
sim_data = df_ET_10.loc[start:end,['ET_VPD']]
sim_data= sim_data.resample('3H').mean()#.between_time("6:00", "9:00")
sim_data = sim_data.loc[(sim_data.index.month>=4) & (sim_data.index.month<=9)]
sim_data=sim_data.dropna()

measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean()#.between_time("6:00", "9:00")
measured_data=measured_data.dropna()
   
df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)
df_tmp=df_tmp.merge(rain_is_zero_corr['is_zero'], how='inner',left_index=True, right_index=True)
df_tmp=df_tmp.loc[df_tmp['is_zero']==True]

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
#print('R2: ',r_value**2)
#print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD'],c=df_tmp.index.hour,s=0.7,cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
#ax.set_ylim(-10,175)
#ax.set_xlim(-10,175)
ax.set_xlabel(r'Observed LE [Wm$^{-2}$]')
ax.set_ylabel(r'Simulated LE [Wm$^{-2}$]')
#fig.suptitle('Correlation of simulated to measured LE (3hour mean) \n for 2008-2018 during growth season (May-Oct), no rain last 3 days \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
fig.suptitle('Correlation of simulated to measured LE flux (3-hour mean) \n for 2001-2020 growth seasons (Apr-Sep), non-rainy days only')
print('intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
#ax.set_title('(Only days on which daily  sum of Precip. is 0)')
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Hour')

In [None]:
#re-draw better correlation plot (longer dataset)
start='2001-05-01 00:00'
end='2020-09-30 00:00'
 
#new filter
sim_data = df_ET_10.loc[start:end,['ET_VPD']]
sim_data= sim_data.resample('3H').mean().between_time("6:00", "12:00")
sim_data = sim_data.loc[(sim_data.index.month>=4) & (sim_data.index.month<=9)]
sim_data=sim_data.dropna()

measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean().between_time("6:00", "12:00")
measured_data=measured_data.dropna()
   
df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)
df_tmp=df_tmp.merge(rain_is_zero_corr['is_zero'], how='inner',left_index=True, right_index=True)
df_tmp=df_tmp.loc[df_tmp['is_zero']==True]

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
#print('R2: ',r_value**2)
#print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD'],c=df_tmp.index.hour,s=0.7,cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
#ax.set_ylim(-10,175)
#ax.set_xlim(-10,175)
ax.set_xlabel(r'measured LE [Wm$^{-2}$]')
ax.set_ylabel(r'A-gs simulated LE [Wm$^{-2}$]')
#fig.suptitle('Correlation of simulated to measured LE (3hour mean) \n for 2008-2018 during growth season (May-Oct), no rain last 3 days \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
fig.suptitle('Correlation of simulated to measured LE flux (3-hour mean between 06:00-12:00) \n for 2001-2020 growth seasons (Apr-Sep), non-rainy days only')
print('intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
#ax.set_title('(Only days on which daily  sum of Precip. is 0)')
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Hour')

In [None]:
#re-draw better correlation plot (plot leaf temperature)
start='2017-04-14 00:00'
end='2017-08-30 00:00'

#new filter
sim_data = df_ET_1.loc[start:end,('ET_VPD','last3day_prec','T_sfc_C')]
sim_data = sim_data.loc[sim_data['last3day_prec']==0.0]
sim_data= sim_data.resample('3H').mean()
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean()
measured_data=measured_data.dropna()

#df_tmp = pd.DataFrame()
df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD'],c=df_tmp['T_sfc_C'],cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-10,175)
ax.set_xlim(-10,175)
ax.set_xlabel(r'measured data LE (EC) [Wm$^{-2}$]')
ax.set_ylabel(r'simulated data LE (A-gs) [Wm$^{-2}$]')
fig.suptitle('Correlation of simulated to measured LE. April-Oct 2017, (3hour mean, 00:00-23:59) \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
ax.set_title('(Only days on which daily  sum of Precip. is 0)')
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Temp [oC]')

## Hourly averages section (hour as bins)

In [None]:
#draw binned GPP
start='2001-04-01 00:00'
end='2020-09-30 00:00'

an_umol=an_umol_10

sim_data=an_umol.loc[an_umol>0].loc[start:end].resample('1H').mean()#.between_time("11:00", "18:00")
sim_data = sim_data.loc[(sim_data.index.month>=4) & (sim_data.index.month<=9)]
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['GPP_f'].loc[start:end].resample('1H').mean()#.between_time("11:00", "18:00")
measured_data=measured_data.dropna()

#new filter
#sim_data = df_ET_1.loc[start:end,('ET_VPD2','last24h_prec')]
#sim_data = sim_data.loc[sim_data['last24h_prec']<=0.5]

#df_tmp = pd.DataFrame()
df_tmp=pd.DataFrame(sim_data)
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

#slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['GPP_f'], df_tmp['sim_data'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
#print('R2: ',r_value**2)
#print('slope, intercept:', slope, intercept)

hours_list=[num for num in range(0,24)]
 
d={}
d['meas_avg'] = [df_tmp.loc[df_tmp.index.hour==i,'GPP_f'].mean() for i in hours_list]
d['meas_q1'] = [df_tmp.loc[df_tmp.index.hour==i,'GPP_f'].quantile(q=0.25) for i in hours_list]
d['meas_q3'] = [df_tmp.loc[df_tmp.index.hour==i,'GPP_f'].quantile(q=0.75) for i in hours_list]
d['sim_avg'] = [df_tmp.loc[df_tmp.index.hour==i,0].mean() for i in hours_list]
d['sim_q1'] = [df_tmp.loc[df_tmp.index.hour==i,0].quantile(q=0.25) for i in hours_list]
d['sim_q3'] = [df_tmp.loc[df_tmp.index.hour==i,0].quantile(q=0.75) for i in hours_list]

df_avg=pd.DataFrame(d)

fig,ax = plt.subplots()
p1=ax.plot(df_avg['meas_avg'],marker='o',label='mean measured GPP')
ax.fill_between(hours_list,df_avg['meas_q1'],df_avg['meas_q3'],alpha=0.1,label='IQR')
p2=ax.plot(df_avg['sim_avg'],marker='s',label='mean A-gs simulated GPP')
ax.fill_between(hours_list,df_avg['sim_q1'],df_avg['sim_q3'],alpha=0.1,label='IQR')

#ax.set_ylim(-5,40)
#ax.set_xlim(-5,40)
plt.xticks(hours_list) 
ax.set_xlabel('hour')
ax.set_ylabel(r'GPP [$\mu molm^{-2}s^{-1}$]')

fig.suptitle('Simulated and measured GPP flux 1-hour means, \n for 2001-2020 growth seasons (Apr-Sep)')
ax.legend(loc='upper left')
ax.grid(axis='y')


In [None]:
# new alternate plot
df_ET_1h=df_ET_10.resample('1H').mean()
df_ET_1h=df_ET_1h.loc['2001-01-01':'2020-12-30'] #knmi dataset is 1 day longer
df_ET_1h=df_ET_1h.loc[rain_is_zero_1h['is_zero']==True]
#df_ET_1h

In [None]:
#draw binned LE
start='2001-04-01 00:00'
end='2020-09-30 00:00'

sim_data = df_ET_1h.loc[start:end,['ET_VPD']]
sim_data = sim_data.loc[(sim_data.index.month>=4) & (sim_data.index.month<=9)]
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb_filter.loc[start:end,'LE']#.resample('1H').mean()#.between_time("11:00", "18:00")
measured_data=measured_data.loc[measured_data>0]
measured_data=measured_data.resample('1H').mean()
measured_data=measured_data.dropna()

#df_tmp = pd.DataFrame()
df_tmp=pd.DataFrame(sim_data)
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

hours_list=[num for num in range(0,24)]

d={}
d['meas_avg'] = [df_tmp.loc[df_tmp.index.hour==i,'LE'].mean() for i in hours_list]
d['meas_q1'] = [df_tmp.loc[df_tmp.index.hour==i,'LE'].quantile(q=0.25) for i in hours_list]
d['meas_q3'] = [df_tmp.loc[df_tmp.index.hour==i,'LE'].quantile(q=0.75) for i in hours_list]
d['sim_avg'] = [df_tmp.loc[df_tmp.index.hour==i,'ET_VPD'].mean() for i in hours_list]
d['sim_q1'] = [df_tmp.loc[df_tmp.index.hour==i,'ET_VPD'].quantile(q=0.25) for i in hours_list]
d['sim_q3'] = [df_tmp.loc[df_tmp.index.hour==i,'ET_VPD'].quantile(q=0.75) for i in hours_list]

df_avg=pd.DataFrame(d)

fig,ax = plt.subplots()
p1=ax.plot(df_avg['meas_avg'],marker='o',label='mean measured LE')
ax.fill_between(hours_list,df_avg['meas_q1'],df_avg['meas_q3'],alpha=0.1,label='IQR')
p2=ax.plot(df_avg['sim_avg'],marker='s',label='mean A-gs simulated LE')
ax.fill_between(hours_list,df_avg['sim_q1'],df_avg['sim_q3'],alpha=0.1,label='IQR')

ax.set_ylim(-20,420)
#ax.set_xlim(-5,40)
plt.xticks(hours_list) 
ax.set_xlabel('hour')
ax.set_ylabel(r'LE [$Wm^{-2}$]')
fig.suptitle('Simulated and measured Latent Heat flux 1-hour means, \n for 2001-2020 growth seasons (Apr-Sep), non-rainy days only')
ax.legend(loc='upper left')
ax.grid(axis='y')


### PAR plot by hour

In [None]:
df_meteo['PAR']

In [None]:
#draw binned LE
start='2001-04-01 00:00'
end='2020-09-30 00:00'

par_data = df_meteo.loc[start:end,['PAR']]
par_data = par_data.resample('1H').mean()
par_data = par_data.loc[(par_data.index.month>=4) & (par_data.index.month<=9)]
par_data=par_data.dropna() #eliminate NaN entries created by the .loc slice


hours_list=[num for num in range(0,24)]

d={}
d['meas_avg'] = [par_data.loc[par_data.index.hour==i,'PAR'].mean() for i in hours_list]
d['meas_q1'] = [par_data.loc[par_data.index.hour==i,'PAR'].quantile(q=0.25) for i in hours_list]
d['meas_q3'] = [par_data.loc[par_data.index.hour==i,'PAR'].quantile(q=0.75) for i in hours_list]

df_avg=pd.DataFrame(d)

fig,ax = plt.subplots()
p1=ax.plot(df_avg['meas_avg'],marker='o',label='mean measured LE')
ax.fill_between(hours_list,df_avg['meas_q1'],df_avg['meas_q3'],alpha=0.1,label='IQR')

#ax.set_ylim(-20,420)
#ax.set_xlim(-5,40)
plt.xticks(hours_list) 
ax.set_xlabel('hour')
ax.set_ylabel(r'PAR')
fig.suptitle('PAR available (1-hour means), \n for 2001-2020 growth seasons (Apr-Sep)')
ax.legend(loc='upper left')
ax.grid(axis='y')
