# A-gs model and implementation (simulation CO2 and H2O flux)

## Initialize data and model

### Setup and fetch data

In [None]:
# Settings
Username   = 'Beheerder'
years      = range(2017,2021)    #(1997,2021) # Set years to download

In [None]:
import os
datapath   = os.path.join('../')
print('datapath is set to %s'%datapath)

# !pip install numpy
# !pip install pandas
# !pip install matplotlib
# !pip install plotly 
# !pip install cufflinks
#!pip install colorspacious
#!pip install seaborn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import plotly.express as px
#import cufflinks as cf
import matplotlib.dates as mdate
import matplotlib.ticker as ticker
from matplotlib import cm
#from colorspacious import cspace_converter
import scipy.stats as stats
#cf.go_offline()
# cf.set_config_file(offline=False, world_readable=True)

from datetime import datetime, timedelta
import sys
sys.path.insert(0, os.path.join(datapath,'PythonScripts'))
from Loobos_Toolbox import dateparse, dateparse_Gapfilled, Read_LoobosEddFinal, Read_LooStor, Read_LoodatGapfill, Read_Loobos_halfhourly, Read_Loobos_meteo, Read_Loobos_soil, Read_Loobos_profile


In [None]:
#these next two lines are to prevent re-loading the data. If you want to re-load data, instead comment them out
if not 'progress' in globals(): progress = list()
if not 'dataloaded' in progress:
  # Read files
    df_EC           = Read_LoobosEddFinal    (years,datapath)
    df_Stor         = Read_LooStor           (years,datapath)
    df_Comb         = Read_LoodatGapfill     (years,datapath)
    df_NEE          = Read_Loobos_halfhourly (years,datapath)
    df_meteo        = Read_Loobos_meteo      (years,datapath)
    df_soil         = Read_Loobos_soil       (years,datapath) 
    df_profile      = Read_Loobos_profile    (years,datapath)
    progress.append('dataloaded')

In [None]:
# Make filter for GPP orginial data and not gabfilled
I = ((df_Comb['GPP_fqc']==0)&(df_meteo['PAR']>0))

# Filter for CO2 data
t = df_profile.index                                          
time = (t < np.datetime64('2013-05-08')) | (t > np.datetime64('2013-06-01'))
CO2 = (df_profile['CO2level1'] > 300)

#General filter
I = ((df_Comb['GPP_fqc']==0)&(df_meteo['PAR']>0))

df_meteo_CO2 = df_meteo[time][CO2]
df_meteo_filter = df_meteo_CO2[I]
# print(df_meteo_CO2_filter['PAR'])

df_Comb_CO2 = df_Comb[time][CO2]
df_Comb_filter = df_Comb_CO2[I]
# print(df_Comb_CO2_filter['GPP_f'])

df_profile_CO2 = df_profile[time][CO2]
df_profile_filter = df_profile_CO2[I]
# print(df_profile_CO2_filter['CO2level1'])

df_EC_CO2 = df_EC[time][CO2]
df_EC_filter = df_EC_CO2[I]

In [None]:
GPP_f_mg = df_Comb_filter['GPP_f']/1000000 * 44.01 * 1000 # from umolm-2s-1 to molm-2s-1, to gm-2s-1, to mgm-2s-1


### Define A-gs model code

In [None]:
# CODE after optimisation

#Calculate canopy resistance and CO2 assimilation/respiration
def runAgs():
    
            #fluxes using the A-Gs scheme.
        co2_ppm   = df_profile_filter['CO2level1']
        epsi      = 1.   # epsilon
        sigma     = 5.67E-8
        Tair_K    = df_Comb_filter['Tair'] + 273.  # This is the air temperature
        Ts_K      = ((df_meteo_filter['L(o)corr'] / (epsi * sigma))**0.25) # This is the surface temperature, which should be used in the model
        Ts_C      = Ts_K - 273.
        conv_fac  = 101.3 / (8.314 * Tair_K)       # converstion factor, obtained via the ideal gas law. mol / m3
        co2_mgm3  = (co2_ppm * 44.01) * conv_fac   # concentration * conversion factor * molar mass CO2.  mgm3 = ppm * g/mol / mol/m3
        Ts        = Ts_C

        rho_1     = 1.225            # Density of air kg/m3

            # Fixed constants 
        Q10gm     = 2.0              # Parameter to calculate the mesophyll conductance
        Q10am     = 2.0              # Parameter to calculate max primary productivity
        Q10gamma  = 2                # Parameter to calculate the CO2 compensation concentration. (2 in IFS, 1.5 in DALES)

            # Reference temperatures calculation mesophyll conductance:
        T1gm      = 273 - 273        # Converted to degreesC
        T2gm      = 309 - 273        # IFS=309, DALES=301 (default)

            # Reference temperatues calculation max primary productivity:
        T1Am      = 273 - 273        # IFS=281, DALES=286 (C4))   Converted to degrees C
        T2Am      = 313 - 273        # IFS=309, DALES=301 

        nuco2q    = 1.6              # Ratio molecular viscosity water to carbon dioxide
        gmin      = 0.25 / 1000.     # Cuticular (minimum) conductance. NOTE: = g_cu in IFS, with a factor 1000 difference (m/s)
        ad        = 0.07             # Regression coefficient to calculate Cfrac (kpa-1)
        Kx        = 0.7              # Extinction coefficient PAR (mground / mleaf)

            # Maximum quantum use efficiency
        epsilon0  =  0.0144    # Maximum quantum use efficiency. mgCO2 / J PAR. Also named alpha

            # Vegetation specific constants
        gm298_umol    = 0.09                        # obtained from litature: Knauer et al. 2018: Effects of mesophyll conductance .....
        gm298         = gm298_umol / conv_fac       # converted to (mm/s)
        Ammax298      = 2.6                         # CO2 maximal primary productivity
        f0            = 0.89                        # Maximum value Cfrac 
        co2_comp298   = (42 * 44.01) * (1/24.45)    # from ppm to mg/m3. Got value 42 from the Atmospheric boundary layer book

        #LAI trees (m2 m-2)
        LAI           = 2.1                         # Obtained from data measurements in Loobos 2021.

            # Constant molar mass  
        constants_M_co2 = 44.01
        constants_M_air = 28.97


            # Calculate the CO2 compensation concentration (IFS eq. 8.92)
            # "The compensation   point Γ is defined as the CO2 concentration at which the net CO2 assimilation of a fully lit leaf becomes zero."

        co2_comp = co2_comp298 * Q10gamma ** ((Ts - 25) / 10) # equation 8.92. co2_comp = mg/m3.

            # Calculate the mesophyll conductance (IFS eq. 8.93)
            # "The mesophyll conductance gm describes the transport of CO2 from the substomatal cavities to the mesophyll cells where the carbon is fixed."

        gm       = (gm298 * Q10gm **((Ts -25)/10)) / ((1. + np.exp(0.3*(T1gm - Ts)))*(1. + np.exp(0.3*(Ts - T2gm)))) 
        gm       = gm / 1000. # convert to m/s

            # Calculate CO2 concentration inside the leaf (Ci)
        fmin0    = gmin/nuco2q - (1./9.) * gm

            # Calculate the minimum value of Cfrac
        fmin     = gmin /(gmin +gm) # Formula from IFS
        # fmin    = -fmin0 + ((fmin0 **2) + 4* gmin/nuco2q *gm)**0.5 / (2. *gm) # formula from DALES


        VPD      = df_Comb_filter['VPD']/10     #Our measurement data from Loobos converted to kPa (/10). Ds in Dales

        VPDmax   = (f0 - fmin) /ad   # VPDmax in kPa. Dmax in Dalese

            # Calculate the fraction of the concentration inside the leaf in comparison with the surface of the leaf. 
        cfrac    = f0 * (1 - VPD/VPDmax) + fmin * (VPD/VPDmax) # f in IFS.

            # Absolute CO2 concentration (mg/m3)
        co2_abs  = co2_mgm3 

            # CO2 concentration in leaf (mg/m3)
        ci       = cfrac * (co2_abs - co2_comp) + co2_comp

            # Max gross primary production in high light conditions 
            #  line 439 / formula 8.94. Ammax is in mg/m2/s
        Ammax    = (Ammax298 * Q10am ** ((Ts - 25)/10)) / ((1. + np.exp(0.3*(T1Am - Ts)))*(1. + np.exp(0.3*(Ts - T2Am))))

            # Gross assimilation rate (Am, IFS eq. 8.97). In mg/m2/s
        Am       = Ammax * (1 - np.exp(-(gm *(ci - co2_comp) / Ammax))) 

            # Autotrophic dark respiration (IFS eq. 8.99). In mg/m2/s
        Rdark    = Am / 9
 
            # Photosynthetically active radiation (PAR), Ia
        PAR      = df_meteo_filter['PAR'] * 0.22 # measured Loobos data. Convert from umol m-2 s-1 to Jm-2s-1

            # Calculate e (maximum quantum use efficiency) Also named as alpha. mgCO2 / J PAR
        epsilon  = epsilon0 * (co2_abs - co2_comp)/(co2_abs + 2. * co2_comp) # Formula from DALES

            # calculate the gross primary productivity (mg/m2/s)            
        Ag       = (Am + Rdark) * (1 - np.exp((-epsilon * PAR)/(Am + Rdark))) - Rdark # Formula 8.98

         
            # Calculate upscaling from leaf to canopy: net flow CO2 into the plant (An) [-]   
        tempy    = epsilon * Kx * PAR / (Am + Rdark)

        def E1(x):
            # E1() approximation
                euler = 0.5772156649015329
                G     = np.exp(-euler)
                b     = (2*(1-G)/(G*(2-G)))**0.5
                h_inf = (1-G)*(G**2 - 6*G+12) / (3*G*(2-G)**2*b)
                q     = 20/47*x**(31/26.)**0.5
                h     = 1 / (1+x*x**0.5)
                E1    = np.exp(-x) / (G+(1-G)*np.exp(-x/(1-G))) * np.log(1+G/x-(1-G)/(h+b*x)**2)
                return E1

            # Calculate the net assimilation

                # 1.- calculate upscaling from leaf to canopy: net flow CO2 into the plant  
        E1_first    = E1(tempy * np.exp(-Kx*LAI))
        E1_second   = E1(tempy)
        An_canopy   = (Am + Rdark) * (1 - 1. / (Kx * LAI) * (E1_first - E1_second)) # code from DALES

                # 2.- calculate upscaling from leaf to canopy: CO2 conductance at canopy level
        a1          = 1.0 / (1 - f0)
        Dstar       = VPDmax / (a1 * (f0 - fmin))

        fstr        = 1.     # ranges from 0: values at wilting point, to 1: absence of moisture stress
        gcco2       = LAI * (gmin / nuco2q + a1 * fstr * An_canopy / ((co2_abs - co2_comp) * (1. + VPD / Dstar))) # m/s

                # 3. calculate surface resistance for moisture and carbon dioxide
        rs          = 1. / (1.6 * gcco2)
        rsCO2       = 1. / gcco2         # Surface resistance of CO2 in s/m


                # calculate the ra, aerodynamic resistance
        U           = df_EC_filter['Mea_Windsp']
        U_star      = df_EC_filter['U-star']
        ra          = U / (U_star**2)             # get the ra from the Loobos observations


        # 4.  calculate net flux of CO2 into the plant (An, mg/m2/s)
        An_final    = (co2_abs - ci) / (ra + rsCO2)   # should have as default a minus sign before the formula
        # The assimilation rate (A) is expressed as amount of CO2 assimilated per unit leaf area and time (mol m−2 s−1)
        # end of Jamie's code

        #I want to convert to umol carbon /m2/s . Molar weight of CO2 is 44.01g/mol
        An_umol = (An_final / 44.01 )*1000

        return(An_final, An_umol, rs, ra)

In [None]:
an_final,an_umol,rs, ra = runAgs()

In [None]:
rs.loc['2017-04-01':'2017-05-01'].plot(label='rs',legend='rs')
ra.loc['2017-04-01':'2017-05-01'].plot(label='ra',legend='ra')

## Calcuate ET

### Assemble dataframe 'df_ET' that will hold output and fill with inputs

In [None]:
df_ET = pd.concat([df_meteo['L(o)'],df_meteo['Te-L(o)'],df_profile['Pressure'],df_Comb['VPD'],df_Comb['rH'],df_meteo['P(mast)']],axis=1,sort=False)
#convert Pressure from hPa to kPa 
df_ET['p_kPa']=df_ET['Pressure']/10
df_ET['VPD_adj']=df_ET['VPD'].loc[df_ET['VPD']>0] #some outlier values for VPD are negative, remove from dataset
df_ET['VPD_adj']=df_ET['VPD_adj']/10  # VPD from df_Comb is in hPa, I need kPa, so hPa/10 = kPa

### step 1) leaf temperature 'T_sfc'

In [None]:
#correcting outgoing Longwave: the sensor measures values between -20 and 10, but that it because the blackbody emission from the sensor itself (dependent on the temp of the sensor) is not taken into account.
#thus we must take the output of the sensor and add the emitted longwave radiation of the sensor itself.
#R_L(out)_corrected = R_L(out)_measured + R_L(out)_sensor, where R_L(out)_sensor = sigma*T(sensor)^4

#constants:
sigma = 5.67e-8 # W/m2/K4, Stefan-boltzmann constant
epsilon = 1/0.98
df_ET['L(o)_sensor'] = sigma*((df_ET['Te-L(o)']+273)**4)    #where Te-L(o) is in C
df_ET['L(o)_corr'] = df_ET['L(o)'] + df_ET['L(o)_sensor'] # where L(o)_corr is corrected Longwave out (corrected for sensor's own temp)
#df_ET['L(o)_corr'].plot() # varies from 300 to 500 Wm-2

#Formula for leaf temp is: R_L(out)_corrected = epsilon * sigma * T_sfc^4 (where epsilon = 0.98-1.00, sigma = 5.67e-8 W/m2/K4, T_sfc in K)
#rearrange formula to:
df_ET['T_sfc'] = (df_ET['L(o)_corr'] / (epsilon*sigma)) ** (1/4)  # T_sfc output in K)
df_ET['T_sfc_C'] = df_ET['T_sfc']-273
#check output:
df_ET['T_sfc_C'].plot(title="Leaf surface temp 'T_sfc_C' in Celcius")

### step 2) saturated vapor pressure 'e_sat'

#### calculating e_sat assuming T_sfc is in Kelvin

In [None]:
#calculating e_sat from T_sfc_C, note that T_sfc_C ranges from -150 C to -250 C (suspicious)
#constants:
e_sat_0 = 0.6107 # e_sat_0 = 0.6107 kPa or 610.7 Pa
a = 7.5
b = 237.3 # oC (geen typo)
df_ET['e_sat'] = e_sat_0 * 10**(a*df_ET['T_sfc_C'] / (b+df_ET['T_sfc_C'])) #  T_sfc_C in oC

#formal clausius-Clapeyron (aka August-Roche-Magnus) from wikipedia: e_sat = e_sat_0 * 10^( 17.6*Temp / 243+ Temp)  where e_sat is in hPa and Temp is in K

p1=df_ET['e_sat'].plot(title="saturated vapor pressure 'e_sat' in kPa")
#p1.axhline(y=100,c='r')
#p1.axhline(y=0.40,c='r')

#### wikipedia formula

In [None]:
#USING WIKIPEDIA FORMULA INSTEAD OF ONE GIVEN BY MICHIEL
#calculating e_sat from T_sfc, note that T_sfc ranges from 0 K to (suspicious)
#constants:
#e_sat_0 = 0.6107 # e_sat_0 = 0.6107 kPa or 610.7 Pa
#a = 17.6
#b = 243 # oC (geen typo)
#df_ET['e_sat_wiki'] = e_sat_0 * 10**(a*df_ET['T_sfc'] / (b+df_ET['T_sfc'])) #  T_sfc in K

#formal clausius-Clapeyron (aka August-Roche-Magnus) from wikipedia: e_sat = e_sat_0 * 10^( 17.6*Temp / 243+ Temp)  where e_sat is in hPa and Temp is in K

#p1=df_ET['e_sat_wiki'].plot(title="saturated vapor pressure 'e_sat' in hPa (using Wikipedia formula) ")
#p1.axhline(y=100,c='r')
#p1.axhline(y=0.40,c='r')

### step 3) ET from VPD and Esat

In [None]:
#VPD(in Pa) = e_sat - e_act
#VPD(in kg/kg) = q_sat - q_act

Rd = 287 # J/kg K
Rv = 462 # J/kg K
# e = vapour pressure # in Pa of kPa
# p = air pressure # in Pa of kPa

#Je kunt specific humidity q in kg/kg berekenen uit vapour pressure e via:
#q = Rd/Rv * e/p

#q_sat = Rd/Rv * e_sat/p
df_ET['q_sat'] = Rd/Rv * df_ET['e_sat']/df_ET['p_kPa']

#method 1 of calculating e_act: through VPD from dataset
#note: this is giving negative values so I'm removing it for now.
#VPD = e_sat - e_act -> e_act = e_sat - VPD
df_ET['e_act_fromVPD'] = df_ET['e_sat'] - df_ET['VPD_adj']

#q_act = Rd/Rv * e_act/p
df_ET['q_act_fromVPD'] = Rd/Rv * df_ET['e_act_fromVPD']/df_ET['p_kPa'] #adding this to check

#final step, subtract to get VPD for specific humidity
#VPD_q = q_sat-q_act
df_ET['VPDq_fromVPD']=df_ET['q_sat'] - df_ET['q_act_fromVPD'] #adding this to check if there's a substantial difference

#method 2 of calculating e_act: through Rel Humidity from dataset
# RH = e_act/e_sat *100 -> e_act = RH * e_sat /100
df_ET['e_act_fromRH'] = (df_ET['rH']/100)*df_ET['e_sat']

#q_act = Rd/Rv * e_act/p
df_ET['q_act_fromRH'] = Rd/Rv * df_ET['e_act_fromRH']/df_ET['p_kPa']

#final step, subtract to get VPD for specific humidity
#VPD_q = q_sat-q_act
df_ET['VPDq_fromRH']=df_ET['q_sat'] - df_ET['q_act_fromRH']

#df_ET['VPDq_fromVPD'].plot(title='VPD (calculated from VPD) in kg(vapor)/kg(air)')
df_ET['VPDq_fromRH'].plot(title='VPD (caldulated from RH) in kg(vapor)/kg(air)')

In [None]:
#plot the differences between the methods
#df_ET['e_act_diff']=df_ET['e_act_fromVPD']-df_ET['e_act_fromRH']
#df_ET['e_act_diff'].plot()
#df_ET['e_sat'].plot()

### step 4) ET in Wm-2 from VPD

In [None]:
#final step
#ET = rho * Lv * VPD/rs
#rho = 1.2 (approx value given by Michiel), Lv = 2260 kJ/kg (from google) Note: update to more accurate values when I can

df_ET['ET'] = 1.2 * 2260000 * (df_ET['VPDq_fromRH']/rs)
df_ET['ET_VPD'] = 1.2 * 2260000 * (df_ET['VPDq_fromVPD']/rs)
df_ET['ET_VPD2'] = 1.2 * 2260000 * (df_ET['VPDq_fromVPD']/(rs+ra))

In [None]:
ra.plot()
#rs.plot()

## End of Calculating ET section

## Visualize results

### yearly overviews of CO2 and H2O fluxes

#### Water (ET and LE) yearly overview

In [None]:
#simulated ET
#2017
fig, ax = plt.subplots()
ET_plotting_2017 = df_ET['ET_VPD2'].loc['2017-04-01':'2017-09-30'].between_time("11:00", "18:00")
ax.plot(ET_plotting_2017)
ax.set_ylabel('ET [W/m2]')
plt.suptitle('simulated ET in Wm-2 for 2017 growth season, between 11:00 and 18:00')
plt.grid()
fig.autofmt_xdate()
plt.show()
#2018
fig, ax = plt.subplots()
ET_plotting_2018 = df_ET['ET_VPD2'].loc['2018-04-01':'2018-09-30'].between_time("11:00", "18:00")
ax.plot(ET_plotting_2018)
ax.set_ylabel('ET [W/m2]')
plt.suptitle('simulated ET in Wm-2 for 2018 growth season, between 11:00 and 18:00')
plt.grid()
fig.autofmt_xdate()
plt.show()

In [None]:
#measured LE
#2017
fig, ax = plt.subplots()
LE_plotting_2017 = df_Comb['LE'].loc[df_Comb['LE']>0].loc['2017-04-01':'2017-09-30'].between_time("11:00", "18:00")
ax.plot(LE_plotting_2017)
ax.set_ylabel('ET [W/m2]')
plt.suptitle('measured LE in Wm-2 for 2017 growth season, between 11:00 and 18:00')
plt.grid()
fig.autofmt_xdate()
plt.show()
#2018
fig, ax = plt.subplots()
LE_plotting_2018 = df_Comb['LE'].loc[df_Comb['LE']>0].loc['2018-04-01':'2018-09-30'].between_time("11:00", "18:00")
ax.plot(LE_plotting_2018)
ax.set_ylabel('ET [W/m2]')
plt.suptitle('measured LE in Wm-2 for 2018 growth season, between 11:00 and 18:00')
plt.grid()
fig.autofmt_xdate()
plt.show()
#difference 2017
fig,ax = plt.subplots()
ax.plot(ET_plotting_2017)
ax.plot(LE_plotting_2017,c='r')
fig.legend(['simulated','measured'])
plt.show()
#difference 2017
fig,ax = plt.subplots()
ax.plot(ET_plotting_2018)
ax.plot(LE_plotting_2018,c='r')
fig.legend(['simulated','measured'])
plt.show()

#### Carbon (An and GPP) yearly overview

In [None]:
#simulated An
#2017
fig, ax = plt.subplots()
an_plotting_2017 = an_umol.loc[an_umol>0].loc['2017-04-01 00:00':'2017-10-01 00:00'].between_time("11:00", "18:00")
ax.plot(an_plotting_2017)
ax.set_ylabel('Assimilation [umol/m2/s]')
plt.suptitle('A-gs simulated Assimilation rate for 2017 growth season, between 11:00 and 18:00')
plt.grid()
fig.autofmt_xdate()
plt.show()
#2018
fig, ax = plt.subplots()
an_plotting_2018 = an_umol.loc[an_umol>0].loc['2018-04-01 00:00':'2018-10-01 00:00'].between_time("11:00", "18:00")
ax.plot(an_plotting_2018)
ax.set_ylabel('Assimilation [umol/m2/s]')
plt.suptitle('A-gs simulated Assimilation rate for 2018 growth season, between 11:00 and 18:00')
plt.grid()
fig.autofmt_xdate()
plt.show()

In [None]:
#measured GPP
#2017
fig, ax = plt.subplots()
GPP_plotting_2017 = df_Comb['GPP_f'].loc['2017-04-05 00:00':'2017-10-01 00:00'].between_time("11:00", "18:00")
ax.plot(GPP_plotting_2017)
ax.set_ylabel('GPP [umol/m2/s]')
plt.suptitle('Measured GPP for 2017 growth season, between 11:00 and 18:00')
plt.grid()
fig.autofmt_xdate()
plt.show()
#2018
fig, ax = plt.subplots()
GPP_plotting_2018 = df_Comb['GPP_f'].loc['2018-04-05 00:00':'2018-10-01 00:00'].between_time("11:00", "18:00")
ax.plot(GPP_plotting_2018)
ax.set_ylabel('GPP [umol/m2/s]')
plt.suptitle('Measured GPP for 2018 growth season, between 11:00 and 18:00')
plt.grid()
fig.autofmt_xdate()
plt.show()

### Day-scale comparison measured and simulated GPP

In [None]:
#for the CO2 flux, presumably in mol/area/time, we take it from .....
#this can be split up in a respiration and assimilation flux. For assimilation we take it using night time representative values over time
#An_final is in mg/m2/s, therefor use an_umol
#2017
fig, ax = plt.subplots()
an_plotting_apr2017 = an_umol.loc[an_umol>0].loc['2017-04-14 00:00':'2017-04-16 00:00']#.between_time("11:00", "18:00")
ax.plot(an_plotting_apr2017)
ax.set_ylabel('Assimilation [umol/m2/s]')
fig.autofmt_xdate()
fig.suptitle('A-gs simulated assimilation (GPP) for 14-16 april 2017')
plt.grid()
plt.show()

fig, ax = plt.subplots()
gpp_plotting_apr2017=df_Comb['GPP_f'].loc['2017-04-14 00:00':'2017-04-16 00:00']
ax.plot(gpp_plotting_apr2017)
ax.set_ylabel('GPP [umol/m2/s]')
#fig.autofmt_xdate()
fig.suptitle('EC-measured GPP_f for 14-16 april 2017')
plt.grid()
plt.show()

fig, ax = plt.subplots()
ax.plot(gpp_plotting_apr2017-an_plotting_apr2017)
ax.set_ylabel('GPP [umol/m2/s]')
#fig.autofmt_xdate()
fig.suptitle('EC-measured GPP_f minus A-gs simulated An for 14-16 april 2017')
plt.grid()
plt.show()


In [None]:
#2017
fig, ax = plt.subplots()
an_plotting_apr2017 = df_ET['ET'].loc['2018-08-01 00:00':'2018-08-03 00:00']
ax.plot(an_plotting_apr2017)
ax.set_ylabel('LE [W/m2]')
fig.autofmt_xdate()
fig.suptitle('Simulated ET for 1-2 august 2018')
plt.grid()
plt.show()

fig, ax = plt.subplots()
gpp_plotting_apr2017=df_Comb['LE'].loc[df_Comb['LE']>0].loc['2018-08-01':'2018-08-02']#.between_time("11:00", "18:00")
ax.plot(gpp_plotting_apr2017)
ax.set_ylabel('measured LE [W/m2]')
#fig.autofmt_xdate()
fig.suptitle('EC-measured LE for 1-2 august 2018')
plt.grid()
plt.show()

fig, ax = plt.subplots()
ax.plot(gpp_plotting_apr2017-an_plotting_apr2017)
ax.set_ylabel('LE [umol/m2/s]')
#fig.autofmt_xdate()
fig.suptitle('EC-measured LE minus A-gs simulated ET for 1-2 august 2018')
plt.grid()
plt.show()

## Troubleshooting ET LE

In [None]:
#2017
start,end = '2017-05-01','2017-05-30'

fig, ax = plt.subplots()
simRH_plotting = df_ET['ET'].loc[start:end]
measured_plotting = df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end]#.between_time("11:00", "18:00")
ax.plot(an_plotting)
ax.set_ylabel('LE [W/m2]')
fig.autofmt_xdate()
fig.suptitle('from RH')
plt.grid()
plt.show()

fig, ax = plt.subplots()
simVPD_plotting = df_ET['ET_VPD'].loc[start:end]
measured_plotting=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end]#.between_time("11:00", "18:00")
ax.plot(measured_plotting)
ax.plot(simVPD_plotting,c='red') #for troubleshoot
ax.set_ylabel('LE [W/m2]')
fig.autofmt_xdate()
fig.suptitle('from VPD')
plt.grid()
fig.legend(['measured','simulated'])
plt.show()

fig, ax = plt.subplots()
ax.plot(gpp_plotting-an_plotting)
ax.set_ylabel('LE [umol/m2/s]')
fig.autofmt_xdate()
fig.suptitle('EC-measured LE minus A-gs simulated ET')
plt.grid()
plt.show()

In [None]:
df_ET['L(o)'].loc[start:end].plot()

## Correlation plots

### H2O flux

In [None]:
sim_data=df_ET['ET'].loc['2017-05-01 00:00':'2017-05-30 00:00'].resample('3H').mean().between_time("11:00", "18:00")
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc['2017-05-01 00:00':'2017-05-30 00:00'].resample('3H').mean().between_time("11:00", "18:00")
measured_data=measured_data.dropna()

df_tmp = pd.DataFrame()
df_tmp['sim_data']=sim_data

df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

In [None]:
slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['sim_data'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

In [None]:
fig,ax = plt.subplots()
ax.scatter(df_tmp['LE'],df_tmp['sim_data']) #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r')
ax.set_xlabel('measured data LE (EC) [Wm-2]')
ax.set_ylabel('simulated data ET (A-gs) [Wm-2]')
fig.suptitle('correlation of simulated ET to measured LE. from 1st to 30th May 2017, (3hour mean, all hours) \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))

### CO2 flux

In [None]:
plt.rcParams["legend.loc"]="center right"

In [None]:
sim_data=an_umol.loc[an_umol>0].loc['2017-05-01 00:00':'2017-05-30 00:00'].resample('3H').mean()#.between_time("11:00", "18:00")
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['GPP_f'].loc['2017-05-01 00:00':'2017-05-30 00:00'].resample('3H').mean()#.between_time("11:00", "18:00")
measured_data=measured_data.dropna()

df_tmp = pd.DataFrame()
df_tmp['sim_data']=sim_data

df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

In [None]:
slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['GPP_f'], df_tmp['sim_data'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

In [None]:
fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['GPP_f'],df_tmp['sim_data'],c=df_tmp.index.hour,cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-5,40)
ax.set_xlim(-5,40)
ax.set_xlabel(r'measured data An (GPP_f) [$\mu molm^{-2}s^{-1}$]')
ax.set_ylabel(r'simulated data An (A-gs) [$\mu molm^{-2}s^{-1}$]')
fig.suptitle('Correlation of simulated to measured An, May 2017, (3hour mean, 00:00-23:59) \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Hour')

#### quick check R2 for different months

In [None]:
#check what the R2 value is month-wise for certain years

for year in [2017, 2018]:
    for month in [4,5,6,7,8]:
        
        start='{}-0{}-01 00:00'.format(year, month)
        end='{}-0{}-01 00:00'.format(year, month+1)
        
        sim_data=an_umol.loc[an_umol>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
        sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
        measured_data=df_Comb['GPP_f'].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
        measured_data=measured_data.dropna()
        
        df_tmp = pd.DataFrame()
        df_tmp['sim_data']=sim_data
        
        df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)
        
        slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['GPP_f'], df_tmp['sim_data'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
        print('{}-0{}-01 -'.format(year, month),'{}-0{}-01'.format(year, month+1),' R2: {:.3f}'.format(r_value**2))


In [None]:
#check what the R2 value is month-wise for certain years

for year in [2017, 2018]:
    for month in [4,5,6,7,8]:
        
        start='{}-0{}-01 00:00'.format(year, month)
        end='{}-0{}-01 00:00'.format(year, month+1)

        sim_data=df_ET['ET'].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
        sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
        measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
        measured_data=measured_data.dropna()
        
        df_tmp = pd.DataFrame()
        df_tmp['sim_data']=sim_data
        
        df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)
        
        slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['sim_data'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
        print('{}-0{}-01 -'.format(year, month),'{}-0{}-01'.format(year, month+1),' R2: {:.3f}'.format(r_value**2))


In [None]:
#check what the R2 value is month-wise for certain years

for year in [2017, 2018]:
    for month in [4,5,6,7,8]:
        
        start='{}-0{}-01 00:00'.format(year, month)
        end='{}-0{}-01 00:00'.format(year, month+1)

        sim_data=df_ET['ET_VPD'].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
        sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
        measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
        measured_data=measured_data.dropna()
        
        df_tmp = pd.DataFrame()
        df_tmp['sim_data']=sim_data
        
        df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)
        
        slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['sim_data'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
        print('{}-0{}-01 -'.format(year, month),'{}-0{}-01'.format(year, month+1),' R2: {:.3f}'.format(r_value**2))


In [None]:
def p_dailysum(df):
    df['daily_prec']=np.nan
    df_tmp = df.resample('1D').sum()
    for idx,row in df.iterrows():
        df.loc[idx,'daily_prec']=df_tmp.loc['{}-{}-{}'.format(idx.year,idx.month,idx.day) ,'P(mast)'] #iloc[row, column]
        #row['daily_prec']=
    return df

In [None]:
def p_lasthours(df):
    df['daily_prec']=np.nan
    df_tmp = df.resample('1D').sum()
    for idx,row in df.iterrows():
        df.loc[idx,'daily_prec']=df_tmp.loc['{}-{}-{}'.format(idx.year,idx.month,idx.day) ,'P(mast)'] #iloc[row, column]
        #row['daily_prec']=
    return df

In [None]:
#check what the R2 value is month-wise for certain years
#USING THE NEW FUNCTION p_dailysum

df_ET_1=p_dailysum(df_ET)

for year in [2017, 2018]:
    for month in [4,5,6,7,8]:
        
        start='{}-0{}-01 00:00'.format(year, month)
        end='{}-0{}-01 00:00'.format(year, month+1)

        sim_data = df_ET_1.loc[start:end,('ET_VPD2','daily_prec')]
        sim_data = sim_data.loc[sim_data['daily_prec']==0.0]
        sim_data= sim_data.resample('3H').mean()#.between_time("11:00", "18:00")
        sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
        measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
        measured_data=measured_data.dropna()
        
        #df_tmp = pd.DataFrame()
        df_tmp=sim_data.copy()
        df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)
        #print(df_tmp)
        slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD2'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
        print('{}-0{}-01 -'.format(year, month),'{}-0{}-01'.format(year, month+1),'Slope:{:.3f} R2: {:.3f}'.format(slope,r_value**2))


In [None]:
df_ET_1['Hour']=df_ET_1.index.hour

In [None]:
#re-draw better correlation plot
start='2017-04-01 00:00'
end='2017-05-01 00:00'
sim_data = df_ET_1.loc[start:end,('ET_VPD2','daily_prec')]
sim_data = sim_data.loc[sim_data['daily_prec']==0.0]
sim_data= sim_data.resample('3H').mean()#.between_time("11:00", "18:00")
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
measured_data=measured_data.dropna()

#df_tmp = pd.DataFrame()
df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD2'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD2'],c=df_tmp.index.hour,cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-10,175)
ax.set_xlim(-10,175)
ax.set_xlabel(r'measured data LE (EC) [Wm$^{-2}$]')
ax.set_ylabel(r'simulated data LE (A-gs) [Wm$^{-2}$]')
fig.suptitle('Correlation of simulated to measured LE. May 2017, (3hour mean, 00:00-23:59) \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
ax.set_title('(Only days on which daily  sum of Precip. is 0)')
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Hour')

In [None]:
#Work in progress, alternative function for eliminating data with precipitation but based on previous 3 hours instead of daily sum
#def p_resample(df,hours=3):
#    df[output]=NaNs
#    i=0
#    j=hours
#    for range(rows-hours) in df:
#        df[output][j]=df[i:j].sum()
#        i++1
#        j++1
#        
#    return df

### hourly averages

In [None]:
start='2017-04-01 00:00'
end='2017-05-01 00:00'
sim_data = df_ET_1.loc[start:end,('ET_VPD2','daily_prec')]
sim_data = sim_data.loc[sim_data['daily_prec']==0.0]
sim_data= sim_data.resample('3H').mean()#.between_time("11:00", "18:00")
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
measured_data=measured_data.dropna()

sim_data['ET_VPD2'].plot()
measured_data.plot()

In [None]:
#re-draw better correlation plot
start='2017-04-01 00:00'
end='2017-05-01 00:00'
sim_data = df_ET_1.loc[start:end,('ET_VPD2','daily_prec')]
sim_data = sim_data.loc[sim_data['daily_prec']==0.0]
sim_data= sim_data.resample('3H').mean()#.between_time("11:00", "18:00")
sim_data=sim_data.dropna() #eliminate NaN entries created by the .loc slice
measured_data=df_Comb['LE'].loc[df_Comb['LE']>0].loc[start:end].resample('3H').mean()#.between_time("11:00", "18:00")
measured_data=measured_data.dropna()

#df_tmp = pd.DataFrame()
df_tmp=sim_data.copy()
df_tmp=df_tmp.merge(measured_data, how='inner',left_index=True, right_index=True)

slope, intercept, r_value, p_value, std_err = stats.linregress(df_tmp['LE'], df_tmp['ET_VPD2'])  #linregres x, y . note r_value is Pearson's coefficient. R^2 is r_value**2
print('R2: ',r_value**2)
print('slope, intercept:', slope, intercept)

fig,ax = plt.subplots()
p1=ax.scatter(df_tmp['LE'],df_tmp['ET_VPD2'],c=df_tmp.index.hour,cmap='viridis') #scatter(x,y)
ax.axline((0.0,intercept),slope=slope,c='r',label='slope')
ax.axline ((0.0,0.0), slope=1, c='r',linestyle='dashed',label='1:1')
ax.set_ylim(-10,175)
ax.set_xlim(-10,175)
ax.set_xlabel(r'measured data LE (EC) [Wm$^{-2}$]')
ax.set_ylabel(r'simulated data LE (A-gs) [Wm$^{-2}$]')
fig.suptitle('Correlation of simulated to measured LE. May 2017, (3hour mean, 00:00-23:59) \n intercept = {:.3f}, slope = {:.3f}, R2 = {:.3f}'.format(intercept,slope,r_value**2))
ax.set_title('(Only days on which daily  sum of Precip. is 0)')
ax.legend(loc='upper left')
cbar=plt.colorbar(p1,ax=ax)
cbar.ax.set_ylabel('Hour')

In [None]:
df_soil.columns

In [None]:
df_soil['SM-Lit'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-003'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-020'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-050'].loc['2017-01-01':'2017-12-30'].plot()
df_soil['SM-100'].loc['2017-01-01':'2017-12-30'].plot()