# 2.1.3: Product benchmarking with sPlot


These trait maps are certainly not the first to be made, and there are several other great attempts at the challenging task of producing global trait maps. This raises an important question for both trait map creators and consumers: which maps should be used and for which purposes?

Here we propose the use of the global vegetation plot dataset sPlot as a benchmark against which existing trait products can be compared, as sPlot is the only global-scale dataset that contains plot-level trait estimates, generally avoiding the biases and pitfalls that come with crowd-sourced species observations.

The trait maps we will be comparing against sPlot are:

- Boonman et al., 2020
- Butler et al., 2017
- Dong et al., 2023
- Schiller et all, 2021
- Madani et al., 2018
- Moreno et al. 2018
- Vallicrosa et al., 2022
- van Bodegom et al., 2014
- Wolf et al., 2022


## Imports and config


In [1]:
import pandas as pd

from src.conf.conf import get_config
from src.conf.environment import log

cfg = get_config()

Get correlations with sPlot for each product.


In [None]:
from pathlib import Path
import xarray as xr

from src.utils.dataset_utils import get_trait_map_fns
from src.utils.raster_utils import open_raster
from src.utils.spatial_utils import lat_weights, weighted_pearson_r


def raster_correlation(
    fn_left: Path, fn_right: Path, resolution: int | float
) -> tuple[str, float]:
    """Calculate the weighted Pearson correlation coefficient between a pair of trait maps."""
    log.info("Loading and filtering data for %s...", fn_right.stem)
    r_left = open_raster(fn_left).sel(band=1)
    r_right = open_raster(fn_right).sel(band=1)

    # Ensure the rasters are aligned
    r_right = r_right.rio.reproject_match(r_left)

    df_left = (
        r_left.to_dataframe(name=f"left_{fn_left.stem}")
        .drop(columns=["band", "spatial_ref"])
        .dropna()
    )
    df_right = (
        r_right.to_dataframe(name=f"right_{fn_right.stem}")
        .drop(columns=["band", "spatial_ref"])
        .dropna()
    )

    log.info("Joining dataframes (%s)...", fn_right.stem)
    df = df_left.join(df_right, how="inner")

    lat_unique = df.index.get_level_values("y").unique()

    log.info("Calculating weights (%s)...", fn_right.stem)
    weights = lat_weights(lat_unique, resolution)

    log.info(
        "Calculating weighted Pearson correlation coefficient (%s)...", fn_right.stem
    )
    r = weighted_pearson_r(df, weights)

    log.info("Weighted Pearson correlation coefficient: %s", r)

    return fn_right.stem, r

In [None]:
from src.utils.dataset_utils import get_trait_maps_dir


def all_products_paths() -> list[Path]:
    """Get the paths to all products."""
    products_dir = Path("data/interim/other_trait_maps")
    data = []
    for subdir in products_dir.iterdir():
        if subdir.is_dir():
            for file in subdir.glob("**/*"):
                if file.is_file():
                    data.append(file)
    return data


def gather_results() -> pd.DataFrame:
    """Gather the results of the raster correlation analysis into a DataFrame."""
    splot_corr_path = Path("results/product_comparison.parquet")
    if splot_corr_path.exists():
        log.info("Loading existing results...")
        splot_corr = pd.read_parquet(splot_corr_path)
    else:
        splot_corr = pd.DataFrame(columns=["trait_id", "author", "r", "resolution"])

    for fn in all_products_paths():
        res = fn.parent.stem
        if res != cfg.model_res:
            continue
        trait_id, author = fn.stem.split("_")
        splot_path = get_trait_maps_dir("splot") / f"{trait_id}.tif"
        _, r = raster_correlation(splot_path, fn, cfg.target_resolution)

        row = {"trait_id": trait_id, "author": author, "r": r, "resolution": res}
        splot_corr = pd.concat([splot_corr, pd.DataFrame([row])])

    return splot_corr

In [None]:
gather_results()

[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Loading and filtering data for X11_bodegom...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Joining dataframes (X11_bodegom)...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Calculating weights (X11_bodegom)...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Calculating weighted Pearson correlation coefficient (X11_bodegom)...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Weighted Pearson correlation coefficient: 0.20405069814434493[0m
  splot_corr = pd.concat([splot_corr, pd.DataFrame([row])])
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Loading and filtering data for X14_moreno...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Joining dataframes (X14_moreno)...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment - INFO - Calculating weights (X14_moreno)...[0m
[94m2024-11-12 12:09:27 UTC - src.conf.environment -

Unnamed: 0,trait_id,author,r,resolution
0,X11,bodegom,0.204051,2
0,X14,moreno,0.170192,2
0,X14,schiller,0.334315,2
0,X50,butler,0.355791,2
0,X14,vallicrosa,0.259626,2
0,X11,butler,0.268479,2
0,X50,schiller,0.499278,2
0,X14,butler,0.26871,2
0,X50,boonman,0.378582,2
0,X14,boonman,0.134811,2


## Compare our maps and others


In [4]:
import numpy as np

pd.set_option("display.max_columns", None)


trait_ids = ["X11_mean", "X14_mean", "X50_mean"]
all_results = (
    pd.read_parquet("results/all_results.parquet")[
        ["trait_id", "resolution", "trait_set", "pearsonr_wt"]
    ]
    .query(
        "trait_set == 'splot_gbif' and trait_id in @trait_ids and resolution != '1km'"
    )
    .rename({"pearsonr_wt": "r"}, axis=1)
    .drop(columns=["trait_set"])
    .assign(author="ours")
    # Replace "_mean" with "" in trait_id
    .assign(trait_id=lambda df: df.trait_id.str.replace("_mean", ""))
    .astype({"trait_id": str, "author": str, "resolution": str, "r": np.float64})
)
dtypes = {"trait_id": str, "author": str, "resolution": str, "r": np.float64}
other_prods = pd.read_csv("results/product_comparison.csv", dtype=dtypes)

In [6]:
merged = pd.concat([all_results, other_prods], ignore_index=True).astype(
    {"resolution": str}
)
merged

Unnamed: 0,trait_id,resolution,r,author
0,X50,001,0.620276,ours
1,X50,001,0.619557,ours
2,X11,001,0.466838,ours
3,X11,001,0.489755,ours
4,X14,001,0.366814,ours
...,...,...,...,...
69,X14,2,0.134811,boonman
70,X50,2,-0.016146,vallicrosa
71,X11,2,0.385794,schiller
72,X11,2,0.344297,boonman


In [7]:
merged.query("author == 'moreno'")

Unnamed: 0,trait_id,resolution,r,author
30,X14,1,0.226362,moreno
31,X14,2,0.259183,moreno
33,X14,5,0.24091,moreno
47,X14,1,0.201013,moreno
61,X14,2,0.170192,moreno


In [24]:
# Set resolution as a top-level header and trait_id as the second-level header, where the column values are the r values and the rows are the authors
pivot = merged.pivot_table(
    index=["author", "resolution"], columns=["trait_id"], values="r"
)
pivot

Unnamed: 0_level_0,trait_id,X11,X14,X50
author,resolution,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bodegom,5,0.23066,,
bodegom,1,0.20848,,
bodegom,2,0.204051,,
boonman,5,0.329728,0.092284,0.366732
boonman,1,0.322155,0.097005,0.359512
boonman,2,0.344297,0.134811,0.378582
butler,5,0.384163,0.349144,0.427301
butler,1,0.342459,0.321303,0.405507
butler,2,0.268479,0.26871,0.355791
madani,5,0.208963,,


In [None]:
print(pivot.to_latex(float_format="%.2f", na_rep="--"))

\begin{tabular}{llrrr}
\toprule
 & trait_id & X11 & X14 & X50 \\
author & resolution &  &  &  \\
\midrule
\multirow[t]{3}{*}{bodegom} & 05 & 0.23 & -- & -- \\
 & 1 & 0.21 & -- & -- \\
 & 2 & 0.20 & -- & -- \\
\cline{1-5}
\multirow[t]{3}{*}{boonman} & 05 & 0.33 & 0.09 & 0.37 \\
 & 1 & 0.32 & 0.10 & 0.36 \\
 & 2 & 0.34 & 0.13 & 0.38 \\
\cline{1-5}
\multirow[t]{3}{*}{butler} & 05 & 0.38 & 0.35 & 0.43 \\
 & 1 & 0.34 & 0.32 & 0.41 \\
 & 2 & 0.27 & 0.27 & 0.36 \\
\cline{1-5}
\multirow[t]{3}{*}{madani} & 05 & 0.21 & -- & -- \\
 & 1 & 0.23 & -- & -- \\
 & 2 & 0.23 & -- & -- \\
\cline{1-5}
\multirow[t]{5}{*}{moreno} & 001 & -- & 0.23 & -- \\
 & 02 & -- & 0.26 & -- \\
 & 05 & -- & 0.24 & -- \\
 & 1 & -- & 0.20 & -- \\
 & 2 & -- & 0.17 & -- \\
\cline{1-5}
\multirow[t]{5}{*}{ours} & 001 & 0.48 & 0.38 & 0.62 \\
 & 02 & 0.63 & 0.61 & 0.67 \\
 & 05 & 0.61 & 0.65 & 0.67 \\
 & 1 & 0.60 & 0.63 & 0.65 \\
 & 2 & 0.61 & 0.60 & 0.65 \\
\cline{1-5}
\multirow[t]{3}{*}{schiller} & 05 & 0.39 & 0.33 & 0.45 \\
 &