# 2.1.3: Product benchmarking with sPlot

These trait maps are certainly not the first to be made, and there are several other great attempts at the challenging task of producing global trait maps. This raises an important question for both trait map creators and consumers: which maps should be used and for which purposes?

Here we propose the use of the global vegetation plot dataset sPlot as a benchmark against which existing trait products can be compared, as sPlot is the only global-scale dataset that contains plot-level trait estimates, generally avoiding the biases and pitfalls that come with crowd-sourced species observations.

The trait maps we will be comparing against sPlot are:
- Boonman et al., 2020
- Butler et al., 2017
- Dong et al., 2023
- Schiller et all, 2021
- Madani et al., 2018
- Moreno et al. 2018
- Vallicrosa et al., 2022
- van Bodegom et al., 2014
- Wolf et al., 2022

## Imports and config

In [1]:
import pandas as pd

from src.conf.conf import get_config
from src.conf.environment import log

cfg = get_config()

Get correlations with sPlot for each product.

In [6]:
from pathlib import Path
from numpy import r_
import xarray as xr

from src.utils.dataset_utils import get_trait_map_fns
from src.utils.raster_utils import open_raster
from src.utils.spatial_utils import lat_weights, weighted_pearson_r


def raster_correlation(fn_left: Path, fn_right: Path, resolution: int | float) -> tuple[str, float]:
    """Calculate the weighted Pearson correlation coefficient between a pair of trait maps."""
    log.info("Loading and filtering data for %s...", fn_right.stem)
    r_left = open_raster(fn_left).sel(band=1)
    r_right = open_raster(fn_right).sel(band=1)

    # Ensure the rasters are aligned
    r_right = r_right.rio.reproject_match(r_left)

    df_left = (
        r_left
        .to_dataframe(name=f"left_{fn_left.stem}")
        .drop(columns=["band", "spatial_ref"])
        .dropna()
    )
    df_right = (
        r_right
        .to_dataframe(name=f"right_{fn_right.stem}")
        .drop(columns=["band", "spatial_ref"])
        .dropna()
    )

    log.info("Joining dataframes (%s)...", fn_right.stem)
    df = df_left.join(df_right, how="inner")

    lat_unique = df.index.get_level_values("y").unique()

    log.info("Calculating weights (%s)...", fn_right.stem)
    weights = lat_weights(lat_unique, resolution)

    log.info("Calculating weighted Pearson correlation coefficient (%s)...", fn_right.stem)
    r = weighted_pearson_r(df, weights)

    log.info("Weighted Pearson correlation coefficient: %s", r)

    return fn_right.stem, r

In [11]:
from src.utils.dataset_utils import get_trait_maps_dir


def all_products_paths() -> list[Path]:
    """Get the paths to all products."""
    products_dir = Path("data/interim/other_trait_maps")
    data = []
    for subdir in products_dir.iterdir():
        if subdir.is_dir():
            for file in subdir.glob("**/*"):
                if file.is_file():
                    data.append(file)
    return data

def gather_results() -> pd.DataFrame:
    """Gather the results of the raster correlation analysis into a DataFrame."""
    splot_corr_path = Path("results/product_comparison.parquet")
    if splot_corr_path.exists():
        log.info("Loading existing results...")
        splot_corr = pd.read_parquet(splot_corr_path)
    else:
        splot_corr = pd.DataFrame(columns=["trait_id", "author", "r", "resolution"])

    for fn in all_products_paths():
        res = fn.parent.stem
        if res != cfg.model_res:
            continue
        trait_id, author = fn.stem.split("_")
        splot_path = get_trait_maps_dir("splot") / f"{trait_id}.tif"
        _, r = raster_correlation(splot_path, fn, cfg.target_resolution)

        row = {"trait_id": trait_id, "author": author, "r": r, "resolution": res}
        splot_corr = pd.concat([splot_corr, pd.DataFrame([row])])
    
    return splot_corr

In [12]:
gather_results()

[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Loading and filtering data for X11_bodegom...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Joining dataframes (X11_bodegom)...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Calculating weights (X11_bodegom)...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Calculating weighted Pearson correlation coefficient (X11_bodegom)...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Weighted Pearson correlation coefficient: 0.20405069814434493[0m
  splot_corr = pd.concat([splot_corr, pd.DataFrame([row])])
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Loading and filtering data for X14_moreno...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Joining dataframes (X14_moreno)...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Calculating weights (X14_moreno)...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment -

Unnamed: 0,trait_id,author,r,resolution
0,X11,bodegom,0.204051,2
0,X14,moreno,0.170192,2
0,X14,schiller,0.334315,2
0,X50,butler,0.355791,2
0,X14,vallicrosa,0.259626,2
0,X11,butler,0.268479,2
0,X50,schiller,0.499278,2
0,X14,butler,0.26871,2
0,X50,boonman,0.378582,2
0,X14,boonman,0.134811,2
