This notebook contains some consistency checks. Using the bias as an example, outliers in terms of especially high and low bias are plotted. One plot contains a map of the domain to see if the outliers are concentrated on a specific region. In another plot, time series of these special grid points are plotted to get a feeling for what is going on.

In [None]:
import numpy as np
import xarray as xr

from eval_utilities import spatial_temporal_metrics as stm
from eval_utilities import visualization as vis
import matplotlib.pyplot as plt

# Load Configuration

In [None]:
import yaml
with open(f"config.yaml") as stream:
    try:
        CONFIG = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

In [None]:
ds_ref = xr.open_zarr(CONFIG["path_ec_euro"]).sel(time=slice("2021-01-01T00", "2022-11-30T00"))
ds_xgb = xr.open_zarr(CONFIG["inf_paths"]["xgb_v3"]).sel(time=slice("2021-01-01T00", "2022-11-30T00"))
ds_mlp = xr.open_zarr(CONFIG["inf_paths"]["mlp_v3"]).sel(time=slice("2021-01-01T00", "2022-11-30T00"))
ds_ump = xr.open_zarr(CONFIG["inf_paths"]["unimp_v3"]).sel(time=slice("2021-01-01T00", "2022-11-30T00"))
ds_mlpobs = xr.open_zarr(CONFIG["inf_paths"]["mlp_obs_v2"]).sel(time=slice("2021-01-01T00", "2022-11-30T00"))

# Bias

In [None]:
bias_xgb = xr.open_zarr(CONFIG["eval_paths"]["xgb_v3"] + "/spatial/bias.zarr")
bias_mlp = xr.open_zarr(CONFIG["eval_paths"]["mlp_v1"] + "/spatial/bias.zarr")
bias_ump = xr.open_zarr(CONFIG["eval_paths"]["unimp_v1"] + "/spatial/bias.zarr")
bias_mlpobs = xr.open_zarr(CONFIG["eval_paths"]["mlp_obs_v2"] + "/spatial/bias.zarr")

In [None]:
#var = "swvl1"
var = "stl1"
#var = "snowc"
#var = "e"

In [None]:
def plot_bias(ds_bias, var, title):
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.set(title=f"Bias {var}")

    vmin = np.nanpercentile(ds_bias.sel(variable=var).data, 1, axis=0)
    vmax = np.nanpercentile(ds_bias.sel(variable=var).data, 99, axis=0)
    im = ax.scatter(ds_bias["lon"], ds_bias["lat"], c=ds_bias.sel(variable=var).data, s=10, vmin=vmin, vmax=vmax)
    fig.colorbar(im)
    ax.set(title=title)

    plt.show()

#plot_bias(bias_xgb, var, f"{var} bias for xgb")
#plot_bias(bias_mlp, var, f"{var} bias for mlp")
#plot_bias(bias_ump, var, f"{var} bias for unimp")
plot_bias(bias_mlpobs, var, f"{var} bias for mlp_obs_v2")

In [None]:
def plot_comp(ds_bias, varname, data_comp, comp_varname, title):
    fig, axs = plt.subplots(1, 2, figsize=(12, 6))
    fig.suptitle(title)

    axs[0].set(title=f"{comp_varname}")
    im = axs[0].scatter(ds_ref["lon"], ds_ref["lat"], c=data_comp, s=10)
    fig.colorbar(im, ax=axs[0], label=comp_varname, orientation="horizontal")

    axs[1].set(title=f"Bias {var}")
    vmin = np.nanpercentile(ds_bias.sel(variable=varname).data, 1, axis=0)
    vmax = np.nanpercentile(ds_bias.sel(variable=varname).data, 99, axis=0)
    im = axs[1].scatter(ds_bias["lon"], ds_bias["lat"], c=ds_bias.sel(variable=varname).data, s=10, vmin=vmin, vmax=vmax)
    fig.colorbar(im, ax=axs[1], label="Bias in K", orientation="horizontal")

    plt.show()

In [None]:
plot_comp(bias_ump, varname=var, data_comp=ds_ref.sel(clim_variable="clim_clake").clim_data, comp_varname="clim_clake", title=f"Bias of {var} in UNIMP compared to lake cover")

In [None]:
plot_bias(bias_ump.where(ds_ref.sel(clim_variable="clim_clake").clim_data < 0.5), var=var, title=f"{var} bias in UNIMP without Lake Cover > 50%")

In [None]:
i = 5847 # Küste beim Asowschen Meer
lat, lon = ds_ref.sel(variable=var).isel(x=i)["lat"].values, ds_ref.sel(variable=var).isel(x=i)["lon"].values

plt.plot(ds_ref["time"], ds_ref.sel(variable=var).isel(x=i).data)
plt.plot(ds_ump["time"], ds_ump.sel(variable=var).isel(x=i).data)
plt.title(f"GP (i_x={i}) @ ({lat:.2f},{lon:.2f})")
plt.xlabel("stl1")
plt.show()

In [None]:
i = 5850 # im Asowschen Meer
lat, lon = ds_ref.sel(variable=var).isel(x=i)["lat"].values, ds_ref.sel(variable=var).isel(x=i)["lon"].values

plt.plot(ds_ref["time"], ds_ref.sel(variable=var).isel(x=i).data)
plt.plot(ds_ump["time"], ds_ump.sel(variable=var).isel(x=i).data)
plt.title(f"GP (i_x={i}) @ ({lat:.2f},{lon:.2f})")
plt.xlabel("stl1")
plt.show()

In [None]:
plot_comp(bias_mlp, 
          varname=var, 
          data_comp=ds_mlp.sel(variable="snowc").data.std(dim="time"), 
          comp_varname="mean snowc in mlp", 
          title=f"Bias of {var} in MLP compared to mean snowc")

In [None]:
plot_comp(bias_mlp, 
          varname=var, 
          data_comp=ds_ref.sel(clim_variable="clim_geopot").clim_data, 
          comp_varname="geopotential", 
          title=f"Bias of {var} in MLP compared to mean snowc")

In [None]:
plot_comp(bias_mlp, 
          varname=var, 
          data_comp=ds_ref.sel(clim_variable="clim_sotype").clim_data, 
          comp_varname="soiltype", 
          title=f"Bias of {var} in MLP compared to mean snowc")

# RMSE

In [None]:
rmse_xgb = xr.open_zarr(fnames["xgb_v3"] + "/spatial/rmse.zarr")
rmse_mlp = xr.open_zarr(fnames["mlp_v1"] + "/spatial/rmse.zarr")
rmse_ump = xr.open_zarr(fnames["unimp_v1"] + "/spatial/rmse.zarr")

In [None]:
var = "stl1"

In [None]:
def plot_rmse(ds_rmse, var, title):
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.set(title=f"RMSE {var}")

    vmin = np.nanpercentile(ds_rmse.sel(variable=var).data, 1, axis=0)
    vmax = np.nanpercentile(ds_rmse.sel(variable=var).data, 99, axis=0)
    im = ax.scatter(ds_rmse["lon"], ds_rmse["lat"], c=ds_rmse.sel(variable=var).data, s=10, vmin=vmin, vmax=vmax)
    fig.colorbar(im)
    ax.set(title=title)

    plt.show()

plot_bias(rmse_xgb, var, f"{var} rmse for xgb")
plot_bias(rmse_mlp, var, f"{var} rmse for mlp")
plot_bias(rmse_ump, var, f"{var} rmse for unimp")

# Exceptional Grid Points

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))

l_p, u_p = 1, 99

lower_perc = np.nanpercentile(ds_bias.sel(variable=var).data, l_p, axis=0)
ax.axvline(lower_perc, color="tab:grey", ls="dashed")
ax.text(lower_perc, 0.99, f"{l_p}%", color='tab:grey', ha='right', va='top', rotation=90, transform=ax.get_xaxis_transform())

upper_perc = np.nanpercentile(ds_bias.sel(variable=var).data, u_p, axis=0)
ax.axvline(upper_perc, color="tab:grey", ls="dashed")
ax.text(upper_perc, 0.99, f"{u_p}%", color='tab:grey', ha='right', va='top', rotation=90, transform=ax.get_xaxis_transform())

ax.hist(ds_bias.sel(variable=var).data, bins=100)
plt.show()

In [None]:
ii_below = np.where(ds_bias.sel(variable=var).data < lower_perc)[0]
ii_above = np.where(ds_bias.sel(variable=var).data > upper_perc)[0]

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
ax.set(title=f"Smoothed anomalies of {var} with high negative bias")

anomalies = ds_mod.sel(variable=var).data.isel(x=ii_below) - ds_ref.sel(variable=var).data.isel(x=ii_below)
ax.plot(anomalies.rolling(time=7).mean()["time"], anomalies.rolling(time=7).mean())

plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
ax.set(title=f"Smoothed anomalies of {var} with high positive bias")

anomalies = ds_mod.sel(variable=var).data.isel(x=ii_above) - ds_ref.sel(variable=var).data.isel(x=ii_above)
ax.plot(anomalies.rolling(time=7).mean()["time"], anomalies.rolling(time=7).mean())

plt.show()

In [None]:
import cartopy.crs as ccrs
import cartopy.feature as cfeature

In [None]:
fig, ax = plt.subplots(figsize=(10,4), subplot_kw={'projection': ccrs.PlateCarree()})

ax.set_extent([ds_ref["lon"].min(), ds_ref["lon"].max(), 
               ds_ref["lat"].min(), ds_ref["lat"].max()], crs=ccrs.PlateCarree())

ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.COASTLINE)
ax.add_feature(cfeature.BORDERS, linestyle=':')

ax.scatter(ds_ref.clim_data.isel(x=ii_above)["lon"], 
           ds_ref.clim_data.isel(x=ii_above)["lat"], 
           #c=ds_bias.data.sel(variable=var).isel(x=ii_above).values, 
           color="red", marker="x", s=80)

plt.show()

In [None]:
i_worst = np.argmax(ds_bias.data.sel(variable=var).values)

## Defect grid point in ECLand?

In [None]:
i = 2982

In [None]:
for var in np.intersect1d(ds_mod.variable, ds_ref.variable):
    plt.plot(ds_ref.data.sel(variable=var).isel(x=i))
    plt.title(f"Variable {var}")
    plt.show()