In [None]:
import numpy as np
import xarray as xr

from eval_utilities import spatial_temporal_metrics as stm
import matplotlib.pyplot as plt

# Load Configuration

In [None]:
import yaml
with open(f"config.yaml") as stream:
    try:
        CONFIG = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

In [None]:
ds_ref = xr.open_zarr(CONFIG["path_ec_euro"]).sel(time=slice("2021-01-01T00", "2022-11-30T00"))
cell_areas = ds_ref.clim_data.sel(clim_variable="clim_cell_area")

# Snow Cover Example

The ILAMB validation framework defines scoring functions using the relative errors (c.f. functions bias or rmse in "spatial_temporal_metrics.py"). To arrive at one summarizing metric, a spatial average over the region of interest is performed. This might also include a weighting function. The relative error is then passed to the exponential function to map to $[0,1)$.

In [None]:
def score(relative_error, alpha=1):
    return( np.exp(-alpha * relative_error) )

Consider the example of snow cover. Obviously, not all grid points experience the same amount of snow. Consequently, it is useful to weight the spatial average using an appropriate measure. Here, we are using the average snow cover over the time series. See the plots below:

In [None]:
var = "snowc"

fname = "/home/ch23/data_ch23/evalution_results/xgbosst_train_2010_2019_val_2020_2020_est_50_hist/spatial/bias.zarr"
ds_bias = xr.open_zarr(fname)

fname = "/home/ch23/data_ch23/evalution_results/xgbosst_train_2010_2019_val_2020_2020_est_50_hist/spatial/nor_bias.zarr"
ds_bias_rel = xr.open_zarr(fname)

# Plot average snowc:
fig, ax = plt.subplots(figsize=(12, 6))
ax.set(title=f"Average snow cover")

im = ax.scatter(ds_bias["lon"], ds_bias["lat"], c=ds_ref.data.sel(variable=var).mean(dim="time"), s=10)
fig.colorbar(im)

plt.show()

# Plot snowc bias:
fig, ax = plt.subplots(figsize=(12, 6))
ax.set(title=f"Bias {var}")

vmin = np.nanpercentile(ds_bias.sel(variable=var).data, 1, axis=0)
vmax = np.nanpercentile(ds_bias.sel(variable=var).data, 99, axis=0)
im = ax.scatter(ds_bias["lon"], ds_bias["lat"], c=ds_bias.sel(variable=var).data, s=10, vmin=vmin, vmax=vmax)
fig.colorbar(im)

plt.show()

Using the weighting, we can reduce the impact of areas with little snow. The effect is visible in the scores:

In [None]:
print(f"No weights:\t{score(stm.spatial_mean(ds_bias_rel, vars=var, cell_areas=cell_areas)).values}")

weights = {"snowc": ds_ref.data.sel(variable=var).mean(dim="time")}
print(f"With weights:\t{score(stm.spatial_mean(ds_bias_rel, vars=var, cell_areas=cell_areas, weights=weights[var])).values}")

# Score Computation

In [None]:
# We might consider moving this to config.yaml later?
model_paths = {"xgb_v1": "/home/ch23/data_ch23/evalution_results/xgbosst_train_2010_2019_val_2020_2020_est_50_hist/"}

metric_fnames = {"Bias": "nor_bias.zarr",
                 "RMSE": "nor_rmse.zarr",
                 "ACC": "acc.zarr",
                 "Phase Shift": "phase_shift.zarr"}

variables = ["swvl1", "swvl2", "swvl3", "stl1", "stl2", "stl3", "snowc"]

weights = {"swvl1": ds_ref.clim_data.sel(clim_variable="clim_theta_cap"), #use field capacity to emphasize potentially moist grid points
           "swvl2": ds_ref.clim_data.sel(clim_variable="clim_theta_cap"),
           "swvl3": ds_ref.clim_data.sel(clim_variable="clim_theta_cap"),
           "stl1": None,
           "stl2": None,
           "stl3": None,
           "snowc": ds_ref.data.sel(variable=var).mean(dim="time")}

In [None]:
def gen_table_header(f, metric, vars):
    """
    Script to generate a simple markdown table header and write it to file stream `f`.
    """
    first_line = f"|{metric}|" #first line contains the metric and the variable names
    second_line = "|-|" #second line is just filled with dashes

    for var in vars: #automatically match number of variables
        first_line += f"{var}|"
        second_line += ":-:|"

    # Write:
    f.write(first_line + "\n")
    f.write(second_line + "\n")

In [None]:
with open("scoreboard.md", "w") as f:
    # Write title:
    f.write("# AILand Score Board\n")
    f.write("\n")
    
    # Generate a table for every metric seperately:
    for metric in metric_fnames.keys():
        # Write metric sub titles:
        f.write(f"## {metric}\n")
        f.write("\n")
        gen_table_header(f, metric, variables)

        # Add a line for every model:
        for model in model_paths.keys():
            ds_metric = xr.open_zarr(f"{model_paths[model]}/spatial/{metric_fnames[metric]}")
            current_line = f"|{model}|"

            for var in variables:
                var_score = score(stm.spatial_mean(ds_metric, vars=var, cell_areas=cell_areas, weights=weights[var])).values.item()
                current_line += f"{var_score:.2f}|"
            f.write(current_line + "\n")
        f.write("\n")
