In [9]:
git_dir / "CORDEX_eval_scripts/catalog.csv"

PosixPath('/dodrio/scratch/projects/2022_200/project_output/RMIB-UGent/vsc31332_inne/ValEnsPy/CORDEX_eval_scripts/catalog.csv')

In [None]:
import xarray as xr
import valenspy as vp
from datatree import DataTree
from pathlib import Path
import pandas as pd
from dask.diagnostics import ProgressBar
import matplotlib.pyplot as plt
import os

#Get the git directory using the Path object using an os command
git_dir = Path(os.popen("git rev-parse --show-toplevel").read().strip())


#User options
variables = ["tas", "pr"]
period = [1980,2019]
target_grid="/dodrio/scratch/projects/2022_200/external/climate_grid/TEMP_AVG_CLIMATE_GRID_1954_2023_daily.nc"

############################################
# STEP 1: Loading the data

# start input manager
manager = vp.InputManager(machine="hortense")


#MODEL data
# Load the ALARO data

df_alaro = pd.read_csv(git_dir / "CORDEX_eval_scripts/catalog.csv")
df_alaro = df_alaro[df_alaro['frequency'] == 'day']
df_alaro = df_alaro[df_alaro['variable_id'].isin(variables)]
df_alaro

#Load all the paths in the df into one xarray dataset
ds_alaro = xr.open_mfdataset(df_alaro['path'].values, decode_coords='all', chunks="auto")
ds_alaro

# Load the COSMO data
## Issue - variarble names are hardcoded due to daily statistic, necessary to find correct path with input manager
experiment = "CB2_CCLM_EUR11_ERA5_evaluation"
ds_cclm_tas = manager.load_data("CCLM", ["tas"], freq="daily", path_identifiers=[experiment, "mean"])
ds_cclm_pr  = manager.load_data("CCLM", ["pr"], freq="daily", path_identifiers=[experiment, "sum"])
ds_cclm = xr.merge([ds_cclm_tas, ds_cclm_pr])
del ds_cclm_tas, ds_cclm_pr


# Load the MAR data
## palceholder for MAR data -for plotting purposes
ds_mar = ds_alaro

#OBSERVATIONAL data

# Load CLIMATE_GRID
## Issue - currently can't load the ungridded CLIMATE_GRID data as there is no unique identifier for the data 

ds_ref = manager.load_data("CLIMATE_GRID", variables, path_identifiers=["regridded"])

# Create a DataTree object
data_dict = {
    "RCM/ERA5/ALARO1_SFX": ds_alaro,
    "RCM/ERA5/CCLM6-0-1-URB-ESG": ds_cclm,
    "RCM/ERA5/MAR": ds_mar,
    "obs/CLIMATE_GRID": ds_ref
}

dt = DataTree.from_dict(data_dict)

############################################
# STEP 2: Preprocessing the data

## Regrid (currently to CLIMATE_GRID)
dt["RCM"] = dt["RCM"].map_over_subtree(vp.remap_xesmf, dt.obs.CLIMATE_GRID.to_dataset(), method="bilinear", regridding_kwargs={"keep_attrs": True})

## Select the time period from 1980 to 2002 (inclusive)
dt = dt.sel(time=slice(f"{period[0]}-01-01", f"{period[1]}-12-31"))


############################################
# STEP 3: Diagnostics

In [None]:
############################################
# STEP 3: Diagnostics

#Compute the data once (not for every diagnostic separately)
with ProgressBar():
    dt = dt.compute()


In [3]:
dt["RCM"] = dt["RCM"].map_over_subtree(vp.remap_xesmf, dt.obs.CLIMATE_GRID.to_dataset(), method="conservative", regridding_kwargs={"keep_attrs": True})

## Select the time period from 1980 to 2002 (inclusive)
dt = dt.sel(time=slice(f"{period[0]}-01-01", f"{period[1]}-12-31"))

KeyError: 'lon_b'

In [12]:

dt_timmean

Unnamed: 0,Array,Chunk
Bytes,1.78 MiB,1.78 MiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 111 graph layers,1 chunks in 111 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.78 MiB 1.78 MiB Shape (483, 483) (483, 483) Dask graph 1 chunks in 111 graph layers Data type float64 numpy.ndarray",483  483,

Unnamed: 0,Array,Chunk
Bytes,1.78 MiB,1.78 MiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 111 graph layers,1 chunks in 111 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.78 MiB,1.78 MiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 111 graph layers,1 chunks in 111 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.78 MiB 1.78 MiB Shape (483, 483) (483, 483) Dask graph 1 chunks in 111 graph layers Data type float64 numpy.ndarray",483  483,

Unnamed: 0,Array,Chunk
Bytes,1.78 MiB,1.78 MiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 111 graph layers,1 chunks in 111 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,911.29 kiB,911.29 kiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 52 graph layers,1 chunks in 52 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 911.29 kiB 911.29 kiB Shape (483, 483) (483, 483) Dask graph 1 chunks in 52 graph layers Data type float32 numpy.ndarray",483  483,

Unnamed: 0,Array,Chunk
Bytes,911.29 kiB,911.29 kiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 52 graph layers,1 chunks in 52 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,911.29 kiB,911.29 kiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 52 graph layers,1 chunks in 52 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 911.29 kiB 911.29 kiB Shape (483, 483) (483, 483) Dask graph 1 chunks in 52 graph layers Data type float32 numpy.ndarray",483  483,

Unnamed: 0,Array,Chunk
Bytes,911.29 kiB,911.29 kiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 52 graph layers,1 chunks in 52 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,756.11 kiB
Shape,"(434, 446)","(434, 446)"
Dask graph,1 chunks in 221 graph layers,1 chunks in 221 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 756.11 kiB 756.11 kiB Shape (434, 446) (434, 446) Dask graph 1 chunks in 221 graph layers Data type float32 numpy.ndarray",446  434,

Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,756.11 kiB
Shape,"(434, 446)","(434, 446)"
Dask graph,1 chunks in 221 graph layers,1 chunks in 221 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,756.11 kiB
Shape,"(434, 446)","(434, 446)"
Dask graph,1 chunks in 221 graph layers,1 chunks in 221 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 756.11 kiB 756.11 kiB Shape (434, 446) (434, 446) Dask graph 1 chunks in 221 graph layers Data type float32 numpy.ndarray",446  434,

Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,756.11 kiB
Shape,"(434, 446)","(434, 446)"
Dask graph,1 chunks in 221 graph layers,1 chunks in 221 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,458.15 kiB
Shape,"(434, 446)","(338, 347)"
Dask graph,4 chunks in 96 graph layers,4 chunks in 96 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 756.11 kiB 458.15 kiB Shape (434, 446) (338, 347) Dask graph 4 chunks in 96 graph layers Data type float32 numpy.ndarray",446  434,

Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,458.15 kiB
Shape,"(434, 446)","(338, 347)"
Dask graph,4 chunks in 96 graph layers,4 chunks in 96 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.48 MiB,916.30 kiB
Shape,"(434, 446)","(338, 347)"
Dask graph,4 chunks in 97 graph layers,4 chunks in 97 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.48 MiB 916.30 kiB Shape (434, 446) (338, 347) Dask graph 4 chunks in 97 graph layers Data type float64 numpy.ndarray",446  434,

Unnamed: 0,Array,Chunk
Bytes,1.48 MiB,916.30 kiB
Shape,"(434, 446)","(338, 347)"
Dask graph,4 chunks in 97 graph layers,4 chunks in 97 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.78 MiB,1.78 MiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 111 graph layers,1 chunks in 111 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.78 MiB 1.78 MiB Shape (483, 483) (483, 483) Dask graph 1 chunks in 111 graph layers Data type float64 numpy.ndarray",483  483,

Unnamed: 0,Array,Chunk
Bytes,1.78 MiB,1.78 MiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 111 graph layers,1 chunks in 111 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.78 MiB,1.78 MiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 111 graph layers,1 chunks in 111 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.78 MiB 1.78 MiB Shape (483, 483) (483, 483) Dask graph 1 chunks in 111 graph layers Data type float64 numpy.ndarray",483  483,

Unnamed: 0,Array,Chunk
Bytes,1.78 MiB,1.78 MiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 111 graph layers,1 chunks in 111 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,911.29 kiB,911.29 kiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 52 graph layers,1 chunks in 52 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 911.29 kiB 911.29 kiB Shape (483, 483) (483, 483) Dask graph 1 chunks in 52 graph layers Data type float32 numpy.ndarray",483  483,

Unnamed: 0,Array,Chunk
Bytes,911.29 kiB,911.29 kiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 52 graph layers,1 chunks in 52 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,911.29 kiB,911.29 kiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 52 graph layers,1 chunks in 52 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 911.29 kiB 911.29 kiB Shape (483, 483) (483, 483) Dask graph 1 chunks in 52 graph layers Data type float32 numpy.ndarray",483  483,

Unnamed: 0,Array,Chunk
Bytes,911.29 kiB,911.29 kiB
Shape,"(483, 483)","(483, 483)"
Dask graph,1 chunks in 52 graph layers,1 chunks in 52 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,41.02 kiB,9.56 kiB
Shape,"(70, 75)","(34, 36)"
Dask graph,9 chunks in 5 graph layers,9 chunks in 5 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 41.02 kiB 9.56 kiB Shape (70, 75) (34, 36) Dask graph 9 chunks in 5 graph layers Data type float64 numpy.ndarray",75  70,

Unnamed: 0,Array,Chunk
Bytes,41.02 kiB,9.56 kiB
Shape,"(70, 75)","(34, 36)"
Dask graph,9 chunks in 5 graph layers,9 chunks in 5 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,41.02 kiB,9.83 kiB
Shape,"(70, 75)","(34, 37)"
Dask graph,9 chunks in 16 graph layers,9 chunks in 16 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 41.02 kiB 9.83 kiB Shape (70, 75) (34, 37) Dask graph 9 chunks in 16 graph layers Data type float64 numpy.ndarray",75  70,

Unnamed: 0,Array,Chunk
Bytes,41.02 kiB,9.83 kiB
Shape,"(70, 75)","(34, 37)"
Dask graph,9 chunks in 16 graph layers,9 chunks in 16 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [None]:

## Plot maps

# settings
averaging_period = ["annual"] # JJA, DJF
variable = "tas"

# calculate time mean for every dataset in tree
def mean_over_time(ds):
    return ds.mean(dim='time')

dt_timmean = dt.map_over_subtree(mean_over_time)


# create figure 
fig, axes = plt.subplots(2,2,figsize=(15,10))
axes = axes.flatten()

for i, dataset_name in enumerate(list(data_dict.keys())):
    ax = axes[i]
    dt_timmean[dataset_name][variable].plot(ax=ax, cmap='coolwarm')


# Mean maps





[##################                      ] | 46% Completed | 130.26 s

In [None]:
#Model2Ref
## Spatial Bias
from valenspy.diagnostic import SpatialBias
with ProgressBar():
    ds_spbias = SpatialBias(dt["RCM/ERA5/ALARO1_SFX"].to_dataset(), dt["obs/CLIMATE_GRID"].to_dataset())
    ds_spbias = ds_spbias.compute()

fig, ax = plt.subplots(1, 2, figsize=(15, 5))
SpatialBias.plot(ds_spbias.tas, ax=ax[0])
SpatialBias.plot(ds_spbias.pr, ax=ax[1])
plt.savefig("CORDEX_eval_scripts/plots/Spatial_bias.png")

In [None]:
from valenspy.diagnostic import SpatialBias
with ProgressBar():
    ds_spbias = SpatialBias(dt["RCM/ERA5/ALARO1_SFX"].to_dataset(), dt["obs/CLIMATE_GRID"].to_dataset())
    ds_spbias = ds_spbias.compute()
