# NEON Paper Plots (Light Use Efficiency Curve) 
## Figure 7: LUE boxplots 
  + Figure 7 Alternatives
##### Author : Negin Sobhani negins@ucar.edu [@negin513](https://github.com/negin513)

##### Last revised: 2023-01-13
_______

This notebooks have the scripts for:

1. Creating light use efficiency curves using specified bins
2. Filtering light use efficiency curves. 

TODO, this script still fails for sites with incomplete data records. Additional modifications are needed for these exceptions

In [1]:
import os
import time
import datetime

import numpy as np
import pandas as pd
import xarray as xr

from glob import glob
from os.path import join

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)


import calendar

import tqdm
import cftime
from neon_utils import download_eval_files

import seaborn as sns ## -- seaborn only needed for boxplots:


In [2]:
print('xarray '+xr.__version__) ##-- was working with 0.20.0

xarray 2022.6.0


## Dask Cluster
The following will spin up a dask cluster,  only works on NCAR machines. 
16 workers worked best for NEON.

In [3]:
from distributed import Client
from ncar_jobqueue import NCARCluster

cluster = NCARCluster()
cluster.scale(16)
client = Client(cluster)
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 41288 instead


0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/wwieder/proxy/41288/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/wwieder/proxy/41288/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.49:46257,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/wwieder/proxy/41288/status,Total threads: 0
Started: Just now,Total memory: 0 B


---------------------------
## Make Light Use Efficiency Curve Boxplots

In [10]:
# -- read list of sites:
#neon_sites_pft = pd.read_csv('/glade/scratch/negins/preprocessed_neon_csv_data/neon_sites_dompft.csv')
neon_sites_pft = pd.read_csv('/glade/u/home/wwieder/CTSM/tools/site_and_regional/neon_sites_dompft.csv')
neon_sites = neon_sites_pft['Site'].to_list()

failed_sites = [] # -- list for saving failed sites

In [5]:
# -- read only these variables from the whole netcdf files
def preprocess (ds):
    variables = ['GPP','ELAI']

    ds_new= ds[variables]
    return ds_new

In [6]:
out_dir = "/glade/work/wwieder/neon_vis_plots/paper_plots_final/"

if not os.path.exists (out_dir):
    print ("plot directory does not exist... creating it now!")
    os.mkdir(out_dir)

In [7]:
## -- Setting some defaults for our figures:
plt.rcParams["font.weight"] = "bold"    
plt.rcParams["axes.labelweight"] = "bold"
font = {'weight' : 'bold',
        'size'   : 15} 
matplotlib.rc('font', **font)

In [11]:
#neon_sites = ['BART'] # for testing
years = ["2018","2019","2020","2021"]
months = ["07","08"]
save_switch = True

for neon_site in neon_sites:
    try: 
        start_site = time.time()
        print ('---------------------------')
        print ("Making plots for "+neon_site)

        sim_path = "/glade/campaign/cgd/tss/wwieder/NEON_v2/hist/"+neon_site+".transient/lnd/hist/"
        #sim_path = "/glade/scratch/wwieder/run_NEON_v2/tools/site_and_regional/archive/"+neon_site+".transient/lnd/hist/"
        #sim_path = "/glade/scratch/negins/neon_v2/tools/site_and_regional/archive/"+neon_site+".transient/lnd/hist/"

        sim_files = []
        for year in years:
            for month in months:
                sim_files.extend(sorted(glob(join(sim_path,neon_site+".transient.clm2.h1."+year+"-"+month+"*.nc"))))

        print("All Simulation files for all years: [", len(sim_files), "files]")

        start = time.time()

        ds_ctsm = xr.open_mfdataset(sim_files, decode_times=True, combine='by_coords',parallel=True, preprocess=preprocess)

        end = time.time()
        print("Reading all simulation files took:", end-start, "s.")


        eval_dir = "/glade/campaign/cgd/tss/wwieder/NEON_v2/eval/"
        #eval_dir = "/glade/work/wwieder/neon/eval_files/"
        #eval_dir = "/glade/work/negins/neon_scripts/notebooks/evaluation_files_2/"
        eval_path = os.path.join(eval_dir,neon_site)

        eval_files = []
        for year in years:
            for month in months:
                eval_files.extend(sorted(glob(join(eval_path,neon_site+"_eval_"+year+"-"+month+".nc"))))

        start = time.time()

        ds_eval = xr.open_mfdataset(eval_files, decode_times=True, combine='by_coords')

        end = time.time()
        print("Reading all observation files took:", end-start, "s.")

        # For datm files in run directories
        #atm_dir = "/glade/scratch/wwieder/NEON_testV2/"
        #atm_suf = neon_site+".transient/run/inputdata/atm/cdeps/v2/"
        #atm_path = os.path.join(atm_dir, atm_suf, neon_site)

        # For datm files dowloaded directly
        atm_dir = "/glade/campaign/cgd/tss/wwieder/NEON_v2/datm"
        atm_path = os.path.join(atm_dir, neon_site)

        atm_files = []
        for year in years:
            for month in months:
                atm_files.extend(sorted(glob(join(atm_path,neon_site+"_atm_"+year+"-"+month+".nc"))))

        start = time.time()

        ds_atm = xr.open_mfdataset(atm_files, decode_times=True, combine='by_coords')

        end = time.time()
        print("Reading all forcing files took:", end-start, "s.")


        #Convert CTSM data to a Pandas Dataframe for easier handling:
        ctsm_vars = ['GPP']

        df_ctsm = pd.DataFrame({'time':ds_ctsm.time})

        for var in ctsm_vars:
            field = np.ravel ( ds_ctsm[var])     
            df_ctsm[var]=field


        #-- make df_all that includes both obs and sim
        df_all = df_ctsm

        atm_vars = ['FSDS']

        for var in atm_vars:
            field = np.ravel ( ds_atm[var])     
            df_all[var]=field

        eval_vars=['GPP']
        for var in eval_vars:
            field = np.ravel ( ds_eval[var])
            obs_var = 'obs_'+var
            df_all[obs_var]=field


        #-- extract year, month, day, hour information from time
        df_all['year'] = df_all['time'].dt.year
        df_all['month'] = df_all['time'].dt.month
        df_all['day'] = df_all['time'].dt.day
        df_all['hour'] = df_all['time'].dt.hour

        df_all['season'] = ((df_all['month']%12+3)//3).map({1:'DJF', 2: 'MAM', 3:'JJA', 4:'SON'})

        # convert NEE units from  umolm-2s-1 to gc/m2/s
        df_all ['obs_GPP']= df_all ['obs_GPP']*(12.01/1000000)

        #convert to perday instead per second
        df_all ['GPP']= df_all['GPP']*60*60*24
        df_all ['obs_GPP']= df_all['obs_GPP']*60*60*24

        df_all['Month'] = df_all['month'].apply(lambda x: calendar.month_abbr[x])

        # --filtering GPP and obs_GPP to >0:
        df_all = df_all[df_all["obs_GPP"]>0]
        df_all = df_all[df_all["GPP"]>0]
        df_all = df_all[df_all["FSDS"]>0]

        # -- filtering only for July values:
        df_all = df_all[df_all["month"]==7]

        
        fig = plt.figure(num=None, figsize=(15, 7.5),  facecolor='w', edgecolor='k')
        axes = fig.subplots(nrows=1, ncols=2)
        axe = axes.ravel()

        sns.regplot(data=df_all, x="FSDS", y="GPP",  ax=axe[0],label='CLM',fit_reg=False,scatter_kws={"s": 80})
        #sns.scatterplot(x=df_all['FSDS'],ax=ax, y=df_all['obs_GPP'])

        axe[0].set(xlabel='Incident Shortwave [Wm⁻²]')
        plot_var_desc = "Gross Primary Production"
        plot_var_unit= "gC m⁻²day⁻¹"
        axe[0].set(ylabel = plot_var_desc +" ["+plot_var_unit+"]")
        #axe[0].set(title = "NEON site : "+neon_site +" JJA",fontsize=19)
        axe[0].set_ylim(-3, 30)

        plot_var_desc = "Gross Primary Production"
        plot_var_unit= "gC m⁻²day⁻¹"
        axe[0].set(ylabel = plot_var_desc +" ["+plot_var_unit+"]")

        sns.regplot(data=df_all, x="FSDS", y="obs_GPP", ax=axe[1], color='orange',label='NEON',fit_reg=False,scatter_kws={"s": 80})

        axe[1].set(xlabel='Incident Shortwave [Wm⁻²]')
        plot_var_desc = "Gross Primary Production"
        plot_var_unit= "gC m⁻²day⁻¹"
        axe[1].set(ylabel = plot_var_desc +" ["+plot_var_unit+"]")
        axe[1].set_ylim(-3, 50)
        fig.suptitle('NEON site : '+neon_site +" JJA",fontweight='bold',fontsize=15,y=0.99)

        lines_labels = [ax.get_legend_handles_labels() for ax in axe]
        #print (lines)
        lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]

        for idx, ax in enumerate(axe):
            ax.set_xlabel('Incident Shortwave [Wm⁻²]', fontsize=17)
            ax.set_xlabel('Incident Shortwave [Wm⁻²]', fontsize=17)
            ax.set_ylim(-3, 50)
            ax.tick_params(right= True,left= True, bottom= True)

            if idx == 0:
                ax.set_ylabel( plot_var_desc +" ["+plot_var_unit+"]",fontsize=17 )
                ax.tick_params(axis='y',which = 'both', direction="in")

            else:
                ax.set_ylabel('')
                ax.set_yticklabels([])
                ax.tick_params(axis='y',which = 'both', direction="in")

        #ax.tick_params(right= True,left= True, bottom= True)

        fig.subplots_adjust(wspace=0, hspace=0)
        fig.suptitle('Neon site : '+ neon_site +'\n 2018-2021 July (Filtered)', fontweight='bold',fontsize=17)

        if save_switch:
            plot_dir = os.path.join(out_dir, 'light_use_curve_scatter', 'png')
            
            if not os.path.isdir(plot_dir):
                os.makedirs(plot_dir, exist_ok=True)
            plot_name = neon_site+'_'+'light_use_curve_scatter.png'
            print ('Saving '+ os.path.join(plot_dir,plot_name))
    
            plt.savefig (os.path.join(plot_dir,plot_name), dpi=600,bbox_inches='tight')
        else:    
            plt.show()

        from scipy import stats

        fig = plt.figure(num=None, figsize=(15, 7.5),  facecolor='w', edgecolor='k')
        axes = fig.subplots(nrows=1, ncols=2)
        axe = axes.ravel()

        slope, intercept, r_value, pv, se = stats.linregress(np.log(df_all['FSDS']), df_all['GPP'])

        sns.regplot(data=df_all, x="FSDS", y="GPP",  ax=axe[0],label='CLM',scatter_kws={"s": 80}, logx=True,x_bins=7, truncate=True,line_kws={'label':'CLM : $y=%3.5s + %3.5s.log(x)$'%(intercept, slope)}, ci =99, x_ci= 99)
        #sns.scatterplot(x=df_all['FSDS'],ax=ax, y=df_all['obs_GPP'])

        axe[0].set(xlabel='Incident Shortwave [Wm⁻²]')
        plot_var_desc = "Gross Primary Production"
        plot_var_unit= "gC m⁻²day⁻¹"
        axe[0].set(ylabel = plot_var_desc +" ["+plot_var_unit+"]")
        #axe[0].set(title = "NEON site : "+neon_site +" JJA",fontsize=19)
        axe[0].set_ylim(-3, 30)

        plot_var_desc = "Gross Primary Production"
        plot_var_unit= "gC m⁻²day⁻¹"
        axe[0].set(ylabel = plot_var_desc +" ["+plot_var_unit+"]")

        slope, intercept, r_value, pv, se = stats.linregress(np.log(df_all['FSDS']), df_all['obs_GPP'])

        p1 = sns.regplot(data=df_all, x="FSDS", y="obs_GPP", ax=axe[1], color='orange',label='NEON',scatter_kws={"s": 80},logx=True,x_bins=7, truncate=True,line_kws={'label':'NEON : $y=%3.5s + %3.5s.log(x)$'%(intercept, slope)}, ci =99, x_ci= 99)


        axe[1].set(xlabel='Incident Shortwave [Wm⁻²]')
        plot_var_desc = "Gross Primary Production"
        plot_var_unit= "gC m⁻²day⁻¹"
        axe[1].set(ylabel = plot_var_desc +" ["+plot_var_unit+"]")
        axe[1].set_ylim(-3, 50)
        fig.suptitle('NEON site : '+neon_site +" JJA",fontweight='bold',fontsize=15,y=0.99)

        lines_labels = [ax.get_legend_handles_labels() for ax in axe]
        lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]

        for idx, ax in enumerate(axe):
            ax.set_xlabel('Incident Shortwave [Wm⁻²]', fontsize=17)
            ax.set_xlabel('Incident Shortwave [Wm⁻²]', fontsize=17)
            ax.set_ylim(-2, 30)
            ax.tick_params(right= True,left= True, bottom= True)

            if idx == 0:
                ax.set_ylabel( plot_var_desc +" ["+plot_var_unit+"]",fontsize=17 )
                ax.tick_params(axis='y',which = 'both', direction="in")

            else:
                ax.set_ylabel('')
                ax.set_yticklabels([])
                ax.tick_params(axis='y',which = 'both', direction="in")

        #ax.tick_params(right= True,left= True, bottom= True)


        fig.subplots_adjust(wspace=0, hspace=0)
        fig.suptitle('Neon site : '+ neon_site +'\n 2018-2021 July (Filtered)', fontweight='bold',fontsize=17)

        plt.legend(lines, labels)

        if save_switch:
            plot_dir = os.path.join(out_dir, 'light_use_curve_scatter_fitted_log_line', 'png')
            
            if not os.path.isdir(plot_dir):
                os.makedirs(plot_dir, exist_ok=True)
            plot_name = neon_site+'_'+'light_use_curve_scatter_fitted_log_line.png'
            print ('Saving '+ os.path.join(plot_dir,plot_name))

            plt.savefig (os.path.join(plot_dir,plot_name), dpi=300,bbox_inches='tight')
        else:
            plt.show()
        

        bins = [0, 25., 50, 100, 200, 300, 400,1000]

        df_all['bins_FSDS'] = pd.cut(df_all['FSDS'], bins)


        df_bins = df_all
        df_bins['bin'] = pd.cut(df_all['FSDS'], bins)


        mid_bins = [12.5,37.5, 75., 150, 250, 350, 450]

        df_bins['bin'] = pd.cut(df_all['FSDS'], bins,labels=mid_bins)


        fig = plt.figure(num=None, figsize=(11, 5.9),  facecolor='w', edgecolor='k')
        axe = plt.gca()

        flyprops = {'markersize':0.01}
        my_pal = {'obs_GPP': "darkorange", 'GPP': "steelblue"}

        colorprops = {'color':'steelblue'}


        mid_bins = [12.5,37.5, 75., 150, 250, 350, 450]
        color1 = 'darkorange'
        color2 = 'steelblue'
        
        color2 = '#1d657e'
        color1 = '#e28743'
        positions=[12.5,37.5, 75., 150, 250, 350, 450] 
        postitions1= [n-6 for n in positions]
        postitions2= [n+12 for n in postitions1]

        colorprops = {'color':'darkorange'}
        boxprops = dict(linestyle='-', linewidth=2, color='k')
        medianprops = dict(linestyle='-', linewidth=2, color='k')

        #sns.regplot(data=df_all, x="FSDS", y="obs_GPP", ax=axe, color=color1,label='NEON',scatter_kws={"s": 80},logx=True, truncate=True, scatter=False, ci = 98)
        #sns.regplot(data=df_all, x="FSDS", y="GPP", ax=axe, color=color2,label='CLM',scatter_kws={"s": 80},logx=True, truncate=True, scatter=False, ci = 98)

        df_bins.boxplot(ax=axe, column = 'obs_GPP',by='bin', positions=postitions1, widths = 11 ,
                patch_artist=True,
                notch=False,
                #flierprops=flyprops,
                showcaps=True,
                showmeans=True,
                showfliers=False, 
                #boxprops=boxprops,
                #medianprops=medianprops,
                #whiskerprops={'color': 'tab:blue'},
                color=dict(boxes="DarkGreen", whiskers="DarkOrange", medians="DarkBlue", caps="Gray"),
                boxprops=dict(linestyle='-', linewidth=1.5,facecolor=color1, color="k"),
                flierprops=dict(linestyle='-', linewidth=1.5,marker='.'),
                medianprops=dict(linestyle='-', linewidth=1.5,color="k"),
                whiskerprops=dict(linestyle='-', linewidth=1.5),
                capprops=dict(linestyle='-', linewidth=1.5),
                meanprops=dict(marker='o', markeredgecolor='black',markerfacecolor='white',markersize=5.5),

                grid=False,
                )
        df_bins.boxplot(ax=axe, column = 'GPP',by='bin', positions=postitions2, widths = 11 ,
                patch_artist=True,
                notch=False,
                #flierprops=flyprops,
                showcaps=True,
                showmeans=True,
                showfliers=False, 
                #boxprops=boxprops,
                #medianprops=medianprops,
                #whiskerprops={'color': 'tab:blue'},
                color=dict(boxes="DarkGreen", whiskers="DarkOrange", medians="DarkBlue", caps="Gray"),
                boxprops=dict(linestyle='-', linewidth=1.5,facecolor=color2, color="k"),
                flierprops=dict(linestyle='-', linewidth=1.5,marker='.'),
                medianprops=dict(linestyle='-', linewidth=1.5,color="k"),
                whiskerprops=dict(linestyle='-', linewidth=1.5),
                capprops=dict(linestyle='-', linewidth=1.5),
                meanprops=dict(marker='o', markeredgecolor='black',markerfacecolor='white',markersize=5.5),

                grid=False,
                )



        axe.set_xlim([-3,500])

        xticks = [0,25,50,100,200,300,400,]
        xticks_labels = [0,25,50,100,200,300, 400,]

        axe.set_xticks(xticks)
        axe.set_xticklabels(xticks_labels)
        axe.set_title('')
        axe.grid(color='gray', linestyle='--', linewidth=0.5)
        axe.yaxis.grid(False, which='major')
        axe.set_ylabel( plot_var_desc +" ["+plot_var_unit+"]",fontsize=13 )
        axe.set_xlabel('Incident Shortwave [Wm⁻²]', fontsize=13)
        plt.suptitle('')
        axe.set_title('Neon site : '+ neon_site +'\n 2018-2021 July', fontweight='bold',fontsize=17)


        red_patch = mpatches.Patch(color=color1, label='NEON')
        blue_patch = mpatches.Patch(color=color2, label='CTSM')
        plt.legend(handles=[red_patch, blue_patch])

        if save_switch:
            plot_dir = os.path.join(out_dir, 'light_use_boxplots_bins_uniform', 'png')
            
            if not os.path.isdir(plot_dir):
                os.makedirs(plot_dir, exist_ok=True)
            plot_name = neon_site+'_'+'light_use_curve_'+'_'+'bins_boxplots_uniform.png'
            print ('Saving '+ os.path.join(plot_dir,plot_name))
            plt.savefig (os.path.join(plot_dir,plot_name), dpi=600,bbox_inches='tight')
            
            
            plot_dir = os.path.join(out_dir, 'light_use_boxplots_bins_uniform', 'pdf')
            
            if not os.path.isdir(plot_dir):
                os.makedirs(plot_dir, exist_ok=True)
            plot_name = neon_site+'_'+'light_use_curve'+'_'+'bins_boxplots_uniform.pdf'
            print ('Saving '+ os.path.join(plot_dir,plot_name))
            plt.savefig (os.path.join(plot_dir,plot_name), dpi=600,bbox_inches='tight', format = 'pdf')
        else:
            plt.show()



        fig = plt.figure(num=None, figsize=(9, 5.9),  facecolor='w', edgecolor='k')
        axe = plt.gca()

        df_plot = df_all.melt(id_vars='bins_FSDS', value_vars=['obs_GPP', 'GPP'])
        my_pal = {'obs_GPP': "darkorange", 'GPP': "steelblue"}

        sns.boxplot(x='bins_FSDS', y='value', hue='variable', data=df_plot,width =0.513,linewidth=3,showfliers = False,palette=my_pal, 
                    showmeans=True,
                    meanprops={"marker":"o",
                    "markerfacecolor":"white", 
                    "markeredgecolor":"black",
                    "markersize":"10"})

        axe.set(xlabel='Incident Shortwave [Wm⁻²]')
        plot_var_desc = "Gross Primary Production"
        plot_var_unit= "gC m⁻²day⁻¹"
        axe.set(ylabel = plot_var_desc +" ["+plot_var_unit+"]")
        #axe[0].set(title = "NEON site : "+neon_site +" JJA",fontsize=19)
        #axe.set_ylim(-1, 30)
        xticks = [0,25,50,100,200,300, 400,]
        xticks_labels = [0,25,50,100,200,300,400,]

        #axe.set_xticks(xticks)
        #axe.set_xticklabels(xticks_labels)
        labels = [item.get_text() for item in axe.get_xticklabels()]

        ticks = [item for item in axe.get_xticks()]
        new_ticks = np.arange(-0.5, len(ticks)-0.5, 1)

        #axe.xaxis.set_minor_locator(AutoMinorLocator())
        axe.set_xticks(new_ticks)


        new_labels = ['        0-25','      25-50','      50-100','    100-200','   200-300', '    300-400','    <400']
        axe.set_xticklabels(new_labels)

        #axe.tick_params(which='minor', length=4, color='r')
        #axe.tick_params(axis='x', which='minor')
        for tick in axe.xaxis.get_majorticklabels():
            tick.set_horizontalalignment("left")

        #axe.set_xticklabels(axe.xaxis.get_majorticklabels(), rotation=45)
        handles, _ = axe.get_legend_handles_labels()          # Get the artists.
        axe.legend(handles, ["NEON", "CTSM"], loc="best") # Associate manually the artists to a label.

        axe.set_title('Neon site : '+ neon_site +'\n 2018-2021 July', fontweight='bold',fontsize=17)

        if save_switch:
            
            plot_dir = os.path.join(out_dir, 'light_use_boxplots_bins_nonuniform', 'png')
            
            if not os.path.isdir(plot_dir):
                os.makedirs(plot_dir, exist_ok=True)
            plot_name = neon_site+'_light_use_boxplots_bins_nonuniform.png'
            print ('Saving '+ os.path.join(plot_dir,plot_name))

            plt.savefig (os.path.join(plot_dir,plot_name), dpi=300,bbox_inches='tight')
        else:
            plt.show()

        end_site = time.time()
        print("Making these plots for "+neon_site+" took : ", end_site-start_site, "s.")


    except Exception as e: 
        print (e)
        print ('THIS SITE FAILED:', neon_site)
        failed_sites.append(neon_site)
        pass
    
    plt.close('all')
print ("Making plots for ", len(failed_sites), "sites failed : ")
print (*failed_sites, sep=" \n")

---------------------------
Making plots for BART
All Simulation files for all years: [ 248 files]
Reading all simulation files took: 1.1563217639923096 s.
Reading all observation files took: 0.2095813751220703 s.
Reading all forcing files took: 0.13255906105041504 s.
Saving /glade/work/wwieder/neon_vis_plots/paper_plots_final/light_use_curve_scatter/png/BART_light_use_curve_scatter.png
Saving /glade/work/wwieder/neon_vis_plots/paper_plots_final/light_use_curve_scatter_fitted_log_line/png/BART_light_use_curve_scatter_fitted_log_line.png
Saving /glade/work/wwieder/neon_vis_plots/paper_plots_final/light_use_boxplots_bins_uniform/png/BART_light_use_curve__bins_boxplots_uniform.png
Saving /glade/work/wwieder/neon_vis_plots/paper_plots_final/light_use_boxplots_bins_uniform/pdf/BART_light_use_curve_bins_boxplots_uniform.pdf
Saving /glade/work/wwieder/neon_vis_plots/paper_plots_final/light_use_boxplots_bins_nonuniform/png/BART_light_use_boxplots_bins_nonuniform.png
Making these plots for BART