In [None]:
import xarray as xr
import valenspy as vp
from datatree import DataTree
from pathlib import Path
import pandas as pd
from dask.diagnostics import ProgressBar
import matplotlib.pyplot as plt

#User options
variables = ["tas", "pr"]
period = [1980,2019]
target_grid="/dodrio/scratch/projects/2022_200/external/climate_grid/TEMP_AVG_CLIMATE_GRID_1954_2023_daily.nc"

############################################
# STEP 1: Loading the data

# start input manager
manager = vp.InputManager(machine="hortense")


#MODEL data
# Load the ALARO data

df_alaro = pd.read_csv("/dodrio/scratch/projects/2022_200/project_output/RMIB-UGent/vsc46032_kobe/ValEnsPy/CORDEX_eval_scripts/catalog.csv")
df_alaro = df_alaro[df_alaro['frequency'] == 'day']
df_alaro = df_alaro[df_alaro['variable_id'].isin(variables)]
df_alaro

#Load all the paths in the df into one xarray dataset
ds_alaro = xr.open_mfdataset(df_alaro['path'].values, decode_coords='all', chunks="auto")
ds_alaro

# Load the COSMO data
## Issue - variarble names are hardcoded due to daily statistic, necessary to find correct path with input manager
experiment = "CB2_CCLM_EUR11_ERA5_evaluation"
ds_cclm_tas = manager.load_data("CCLM", ["tas"], freq="daily", path_identifiers=[experiment, "mean"])
ds_cclm_pr  = manager.load_data("CCLM", ["pr"], freq="daily", path_identifiers=[experiment, "sum"])
ds_cclm = xr.merge([ds_cclm_tas, ds_cclm_pr])
del ds_cclm_tas, ds_cclm_pr


# Load the MAR data

#OBSERVATIONAL data

# Load CLIMATE_GRID
## Issue - currently can't load the ungridded CLIMATE_GRID data as there is no unique identifier for the data 

ds_ref = manager.load_data("CLIMATE_GRID", variables, path_identifiers=["regridded"])

# Create a DataTree object
data_dict = {
    "RCM/ERA5/CCLM6-0-1-URB-ESG": ds_cclm,
    "RCM/ERA5/ALARO1_SFX": ds_alaro,
    "obs/CLIMATE_GRID": ds_ref
}

dt = DataTree.from_dict(data_dict)

############################################
# STEP 2: Preprocessing the data

## Regrid (currently to CLIMATE_GRID)
dt["RCM"] = dt["RCM"].map_over_subtree(vp.remap_xesmf, dt.obs.CLIMATE_GRID.to_dataset(), method="conservative", regridding_kwargs={"keep_attrs": True})

## Select the time period from 1980 to 2002 (inclusive)
dt = dt.sel(time=slice(f"{period[0]}-01-01", f"{period[1]}-12-31"))






File paths found:
/dodrio/scratch/projects/2022_200/project_output/rcs/CORDEXBE2/postprocessing/CB2_CCLM_EUR11_ERA5_evaluation/T_2M/daily/T_2M_daily_mean_1983.nc
/dodrio/scratch/projects/2022_200/project_output/rcs/CORDEXBE2/postprocessing/CB2_CCLM_EUR11_ERA5_evaluation/T_2M/daily/T_2M_daily_mean_1987.nc
/dodrio/scratch/projects/2022_200/project_output/rcs/CORDEXBE2/postprocessing/CB2_CCLM_EUR11_ERA5_evaluation/T_2M/daily/T_2M_daily_mean_1982.nc
/dodrio/scratch/projects/2022_200/project_output/rcs/CORDEXBE2/postprocessing/CB2_CCLM_EUR11_ERA5_evaluation/T_2M/daily/T_2M_daily_mean_1995.nc
/dodrio/scratch/projects/2022_200/project_output/rcs/CORDEXBE2/postprocessing/CB2_CCLM_EUR11_ERA5_evaluation/T_2M/daily/T_2M_daily_mean_1984.nc
/dodrio/scratch/projects/2022_200/project_output/rcs/CORDEXBE2/postprocessing/CB2_CCLM_EUR11_ERA5_evaluation/T_2M/daily/T_2M_daily_mean_1993.nc
/dodrio/scratch/projects/2022_200/project_output/rcs/CORDEXBE2/postprocessing/CB2_CCLM_EUR11_ERA5_evaluation/T_2M/dail

In [None]:
############################################
# STEP 3: Diagnostics

#Compute the data once (not for every diagnostic separately)
with ProgressBar():
    dt = dt.compute()


In [None]:

## Plot maps

averaging_period = ["annual"] # JJA, DJF


# Mean maps



#Model2Ref
## Spatial Bias
from valenspy.diagnostic import SpatialBias
with ProgressBar():
    ds_spbias = SpatialBias(dt["RCM/ERA5/ALARO1_SFX"].to_dataset(), dt["obs/CLIMATE_GRID"].to_dataset())
    ds_spbias = ds_spbias.compute()

fig, ax = plt.subplots(1, 2, figsize=(15, 5))
SpatialBias.plot(ds_spbias.tas, ax=ax[0])
SpatialBias.plot(ds_spbias.pr, ax=ax[1])
plt.savefig("CORDEX_eval_scripts/plots/Spatial_bias.png")

In [None]:
from valenspy.diagnostic import SpatialBias
with ProgressBar():
    ds_spbias = SpatialBias(dt["RCM/ERA5/ALARO1_SFX"].to_dataset(), dt["obs/CLIMATE_GRID"].to_dataset())
    ds_spbias = ds_spbias.compute()
