This notebook performs some of the evaluation connected to the time domain:
- Power spectra a computed for each variable and compared between a reference and emulated data set
- Error metrics are conditioned on season (only bias for now)
- Error metrics are conditioned on time of day, i.e., day (06-12 local time) and night (only bias for now)

Note that the spectra and amplitude maps are also generated in the automatic workflow. This notebook (as is) will only visualize the results, but not save the plots. You could change this behavior by setting "show=False" in the respective calls to the visualization utilities.

This notebook will produce a lot of figures. Consider restricting the number of analyzed variables.

In [None]:
import numpy as np
import xarray as xr

from eval_utilities import spatial_temporal_metrics as stm
from eval_utilities import visualization as vis
import matplotlib.pyplot as plt

import cartopy.crs as ccrs
import cartopy.feature as cfeature

# Load Configuration

In [2]:
import yaml
with open(f"config.yaml") as stream:
    try:
        CONFIG = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

Choose model to analyze:

In [32]:
model = "xgb_v3"

ds_ref = xr.open_zarr(CONFIG["path_ec_euro"]).sel(time=slice("2021-01-01T00", "2022-11-30T00"))
ds_mod = xr.open_zarr(CONFIG["inf_paths"][model]).sel(time=slice("2021-01-01T00", "2022-11-30T00"))

# Harmonic Analysis

## Power Spectra

Compute the power spectra of all variables conatined in both data sets.

In [None]:
# Path for saving the plots:
path_png = CONFIG["eval_paths"][model] + "/visualization/spectrum"

# Loop through all variables contained in both data sets:
common_vars = np.intersect1d(ds_ref.variable, ds_mod.variable)
for var in common_vars:
    vis.power_spectrum(ds_mod, ds_ref, var, path_png, show=True)

## Spatial Maps

Visually compare the spatial amplitude variations at a specific frequency.

In [None]:
# Path for saving the plots:
path_png = CONFIG["eval_paths"][model] + "/visualization/harmonic_analysis"

# Loop through all variables contained in both data sets:
common_vars = np.intersect1d(ds_ref.variable, ds_mod.variable)
for var in common_vars:
    time_axis = np.where(np.array(ds_ref.data.sel(variable=var).shape) == len(ds_ref.time))[0][0]
    fft_ref = np.fft.rfft(ds_ref.data.sel(variable=var), axis=time_axis)
    fft_mod = np.fft.rfft(ds_mod.data.sel(variable=var), axis=time_axis)
    freq = np.fft.rfftfreq(ds_ref.sizes["time"], d=(ds_ref.time[1] - ds_ref.time[0]).item() / 1e9)

    i_day = np.argmin(np.abs(freq - 1/(24*60*60)))
    vis.plot_amplitude_map(abs(fft_ref[i_day]), abs(fft_mod[i_day]), path_png, "Diurnal", ds_ref, var, show=True)
    
    i_month =  np.argmin(np.abs(freq - 1/(30*24*60*60)))
    vis.plot_amplitude_map(abs(fft_ref[i_month]), abs(fft_mod[i_month]), path_png, "Monthly", ds_ref, var, show=True)

    i_season = np.argmin(np.abs(freq - 4/(365*24*60*60)))
    vis.plot_amplitude_map(abs(fft_ref[i_season]), abs(fft_mod[i_season]), path_png, "Seasonal", ds_ref, var, show=True)

    i_year = np.argmin(np.abs(freq - 1/(365*24*60*60))) 
    vis.plot_amplitude_map(abs(fft_ref[i_year]), abs(fft_mod[i_year]), path_png, "Annual", ds_ref, var, show=True)
    

# Condition on Season

We included two ways to visualize this. The boxplot version suffers from a great number of outliers, so histograms made more sense here as day and night only give two categories and although it is less ideal for the seasonality. For a dataset with higher than 6h resolution, it might be worthwile to have more categories. In that case boxplots might be preferable. 

In [26]:
metric = "bias" # bias or rmse
style = "hist" # hist or box

In [27]:
season_masks = {"DJF": ds_ref["time"].dt.month.isin([12,1,2]), 
                "MAM": ds_ref["time"].dt.month.isin([3,4,5]),
                "JJA": ds_ref["time"].dt.month.isin([6,7,8]), 
                "SON": ds_ref["time"].dt.month.isin([9,10,11])}

In [None]:
path_png = CONFIG["eval_paths"][model] + "/visualization/"
common_vars = np.intersect1d(ds_mod.variable, ds_ref.variable)

for var in common_vars:
    seasonal_results = np.full([4, ds_ref.sizes["x"]], np.nan)

    if metric == "bias":
        for i, sm in enumerate(season_masks.values()):
            seasonal_results[i] = stm.bias(ds_mod.isel(time=sm), ds_ref.isel(time=sm), vars=var)
    elif metric == "rmse":
        for i, sm in enumerate(season_masks.values()):
            seasonal_results[i] = stm.rmse(ds_mod.isel(time=sm), ds_ref.isel(time=sm), vars=var)
    else:
        print("Metric not available.")
        break

    fig, ax = plt.subplots(figsize=(8, 6))
    
    if style == "box":
        ax.boxplot(seasonal_results.T, labels=season_masks.keys())
        ax.set_ylim(np.nanpercentile(seasonal_results, 0.05), np.nanpercentile(seasonal_results, 99.95))
    elif style == "hist":
        _, bins, _ = ax.hist(seasonal_results[0], bins=20, label="DJF", alpha=0.5, density=True)
        _ = ax.hist(seasonal_results[1], bins=bins, label="MAM", alpha=0.5, density=True)
        _ = ax.hist(seasonal_results[2], bins=bins, label="JJA", alpha=0.5, density=True)
        _ = ax.hist(seasonal_results[3], bins=bins, label="SON", alpha=0.5, density=True)
    else:
        print("Visualization style not available.")
        break
    
    ax.set(title=f"{var} {metric} in different seasons")
    plt.legend()
    
    #fig.savefig(f"{path_png}/bias_season_{var}.png", bbox_inches="tight")
    plt.show()

# Condition on Time

In [35]:
metric = "rmse" # bias or rmse
style = "hist" # hist or box

Work out the local times to assign night and day correctly:

In [36]:
standard_time = ds_ref["time"].dt.hour.expand_dims(dim={"x": ds_ref["x"]})

local_time = standard_time - (4 * ds_ref["lon"])/60. #the sun takes 4 min to traverse 1° longitude
local_time = local_time.T % 24 #convert negative values

day_mask = (local_time >= 6) & (local_time < 18)

In [37]:
path_png = CONFIG["eval_paths"][model] + "/visualization/"
common_vars = np.intersect1d(ds_mod.variable, ds_ref.variable)

for var in common_vars:
    diurnal_results = np.full([2, ds_ref.sizes["x"]], np.nan)

    if metric == "bias":
        diurnal_results[0] = stm.bias(ds_mod.where(day_mask), ds_ref.where(day_mask), vars=var)
        diurnal_results[1] = stm.bias(ds_mod.where(~day_mask), ds_ref.where(~day_mask), vars=var)
    elif metric == "rmse":
        diurnal_results[0] = stm.rmse(ds_mod.where(day_mask), ds_ref.where(day_mask), vars=var)
        diurnal_results[1] = stm.rmse(ds_mod.where(~day_mask), ds_ref.where(~day_mask), vars=var)
    else:
        print("Metric not available.")
        break
        
    fig, ax = plt.subplots(figsize=(8, 6))

    if style == "box":
        ax.boxplot(diurnal_results.T, labels=["day","night"])
        ax.set_ylim(np.nanpercentile(diurnal_results, 0.05), np.nanpercentile(diurnal_results, 99.95))
    elif style == "hist":
        _, bins, _ = ax.hist(diurnal_results[0], bins=20, label="day", alpha=0.5, density=True)
        _ = ax.hist(diurnal_results[1], bins=bins, label="night", alpha=0.5, density=True)
    else:
        print("Visualization style not available.")
        break

    ax.set(title=f"{var} {metric} in different times of day")
    plt.legend()

    #fig.savefig(f"{path_png}/bias_diurnal_{var}.png", bbox_inches="tight")
    plt.show()