# 2) Generate Trait Statistics #

This notebook takes the parameter file used to define the 500 model runs and produces trait statistics at every grid cell.<br>
<br>
Inputs: <br>
Metrics.nc - generated from notebook 1 - contains Cveg used for weighting traits<br>
wave1_params.nc - param file detailing the scaler (0-1) used to set traits in each of the 500 model runs<br>
PFTperturbation-info.csv - information on the perturbation ranges for each trait<br>
PFTparameters-default.csv - trait default values<br>
PFTparameters-min.csv - min of trait pertubation range <br>
PFTparameters-max.csv - max of trait pertubation range <br>

Ouputs:
Traits.nc - contains trait parameter values and aggregated grid cell mean, sd, and cv of traits

## Load in Packages ##

In [1]:
import numpy as np
import xarray as xr
import matplotlib
import matplotlib.pyplot as plt
import os
import cartopy.crs as ccrs
import matplotlib.gridspec as gridspec
import pandas as pd
import importlib.util

repo_dir = os.getcwd()

## Set up functions ##

In [4]:
pftnames = np.asarray(['not_vegetated', 
                       'needleleaf_evergreen_temperate_tree',
                       'needleleaf_evergreen_boreal_tree', 
                       'needleleaf_deciduous_boreal_tree',
                       'broadleaf_evergreen_tropical_tree',
                       'broadleaf_evergreen_temperate_tree',
                       'broadleaf_deciduous_tropical_tree',
                       'broadleaf_deciduous_temperate_tree', 
                       'broadleaf_deciduous_boreal_tree',
                       'broadleaf_evergreen_shrub', 
                       'broadleaf_deciduous_temperate_shrub',
                       'broadleaf_deciduous_boreal_shrub', 
                       'c3_arctic_grass',
                       'c3_non-arctic_grass', 
                       'c4_grass', 
                       'c3_crop'], dtype=object)

pft_traits = ['froot_leaf',
             'kmax',
             'krmax',
             'leaf_long',
             'leafcn',
             'lmr_intercept_atkin',
             'medlynintercept',
             'medlynslope',
             'psi50',
             'slatop',
             'stem_leaf',
             'theta_cj']

## Load in data ##

In [5]:
Metrics = xr.open_dataset(repo_dir+'/input/Metrics.nc')


trait_data = xr.open_dataset(repo_dir+'/params/wave1_params.nc')
traits = trait_data['parameter'].values

trait_info = pd.read_csv(repo_dir+'/params/PFTperturbation-info.csv')
trait_info = trait_info[trait_info['name'].isin(traits)]

trait_default = pd.read_csv(repo_dir+'/params/PFTparameters-default.csv')
trait_default = trait_default.transpose()
trait_default.columns = trait_default.iloc[0]
trait_default = trait_default[1:17]
trait_default = xr.Dataset.from_dataframe(trait_default).isel(index = slice(1,18)).rename({'index':'pft'})


trait_mins = pd.read_csv(repo_dir+'/params/PFTparameters-min.csv')
trait_mins = trait_mins.transpose()
trait_mins.columns = trait_mins.iloc[0]
trait_mins = trait_mins[1:17]
trait_mins = xr.Dataset.from_dataframe(trait_mins)


trait_maxs = pd.read_csv(repo_dir+'/params/PFTparameters-max.csv')
trait_maxs = trait_maxs.transpose()
trait_maxs.columns = trait_maxs.iloc[0]
trait_maxs = trait_maxs[1:17]
trait_maxs = xr.Dataset.from_dataframe(trait_maxs)

## Calculate Ecosystem Scale Grid Cell Means and Coefficient of Variations ##

In [6]:
trait_list = []
for param in trait_data['parameter'].values:
    trait = trait_data.sel(parameter = param)
    trait = trait.reset_coords('parameter', drop = True)
    trait = trait.rename({'wave1_params':param})
    trait_list.append(trait)
traitds = xr.merge(trait_list)

pft_traits = list(trait_maxs.variables)[list(trait_maxs.variables).index('froot_leaf'):list(trait_maxs.variables).index('theta_cj')+1]

traits_norm = traitds[pft_traits].assign_coords({'ens': range(1,501)})
for trait in pft_traits:
    traits_norm = traits_norm.rename({trait:trait+'_Norm'})

mins = []
maxs = []
for index, row in trait_info.iterrows():
    try:
        min_val = float(row['min'])
        mins.append(min_val)
        max_val = float(row['max'])
        maxs.append(max_val)
    except ValueError:
        if 'percent' in str(row['min']):
            percent_val_min = float(row['min'][:2])
            percent_val_max = float(row['max'][:2])
            try:
                default_val = float(row['CLM5 Default Value(s)'])
                min_val = (1 - percent_val_min/100) * default_val
                max_val = (1 + percent_val_max/100) * default_val
                mins.append(min_val)
                maxs.append(max_val)
            except ValueError:
                mins.append(float('nan'))
                maxs.append(float('nan'))
        else:
            mins.append(float('nan'))
            maxs.append(float('nan'))

trait_info['min_adjusted'] = mins
trait_info['max_adjusted'] = maxs

trait_info = trait_info.dropna(subset=['min_adjusted'])
trait_info = trait_info[['name','min_adjusted','max_adjusted']]
trait_info = trait_info[trait_info['name'] != 'medlynintercept']

# Create a dataset for scaled values
trait_values_scaled = xr.Dataset()

# Loop through each trait
for trait in trait_maxs.data_vars:
    # Extract maximum, minimum, and scaling factor for the current trait
    max_values = trait_maxs[trait]
    min_values = trait_mins[trait]
    scale_factors = trait_data.sel(parameter = trait).wave1_params
    
    # Scale the trait values using the ensemble and pft dimensions
    scaled_values = np.asarray(min_values.astype('double')) + scale_factors * (max_values.astype('double').rename({'index':'pft'}) - min_values.astype('double').rename({'index':'pft'}))
    
    dims = ('ens', 'pft')
    coords = {'ens': np.arange(1, 501), 'pft': trait_mins.rename({'index':'pft'}).pft}
    
    da_scaled = xr.DataArray(scaled_values, dims=dims, coords = coords)
    trait_values_scaled[trait] = da_scaled
    
    print(f'finished {trait}')


trait_values_scaled = xr.merge([trait_values_scaled])

universal_params = []
for index, row in trait_info.iterrows():
    params = []
    for i in range(0,500):
        trait = row['name']
        trait_scalers = trait_data.sel(ens = i, parameter = trait).mean(dim = 'pft').wave1_params.item()
        scaled_values = row['min_adjusted'] + trait_scalers * (row['max_adjusted'] - row['min_adjusted'])
        params.append(scaled_values)
    trait_dataarray = xr.DataArray(params, dims=['ens'], name=trait)
    universal_params.append(trait_dataarray)
universal_params = xr.merge(universal_params).drop('theta_cj')

all_traits  = xr.merge([Metrics.TOTVEGC, trait_values_scaled, universal_params, traits_norm])
all_traits = all_traits.drop_sel(pft = 'c3_crop')

gcds = []
for i in all_traits.gridcell.values:
    ds = all_traits.sel(gridcell = i)
    weights = ds.TOTVEGC.mean(dim = 'ens') / ds.TOTVEGC.sum(dim = 'pft').mean(dim = 'ens')
    tds = xr.Dataset()
    for t in pft_traits:
        weighted_mean = (weights * ds[t]).sum(dim='pft')
        tds[t + 'Mean'] = weighted_mean
        
        try:
            weighted_variance = (weights * (ds[t] - weighted_mean) ** 2).sum(dim='pft') / weights.sum(dim='pft')
            weighted_std_dev = np.sqrt(weighted_variance.astype('float64'))
            weighted_cv = weighted_std_dev/weighted_mean
        except:
            weighted_std_dev = np.nan
            weighted_cv = np.nan
            
        tds[t + 'SD'] = weighted_std_dev
        tds[t + 'CV'] = weighted_cv
    gcds.append(tds)
    if i % 100 == 0:
        print(f'finished with {i}')
tds = xr.concat(gcds, dim = 'gridcell')

traitsds_final = xr.merge([all_traits, tds])
traitsds_final = traitsds_final.drop_vars('TOTVEGC').mean(dim = 'time')
traitsds_final.attrs = {}

finished froot_leaf
finished kmax
finished krmax
finished leaf_long
finished leafcn
finished lmr_intercept_atkin
finished medlynintercept
finished medlynslope
finished psi50
finished slatop
finished stem_leaf
finished theta_cj
finished with 0
finished with 100
finished with 200
finished with 300


In [8]:
traitsds_final.to_netcdf(repo_dir+'/input/Traits.nc')