In [43]:
import xscen as xs
import xarray as xr
from xscen.config import CONFIG

# Load configuration
xs.load_config(
    "../paths_obs.yml", "../config_obs.yml", verbose=(__name__ == "__main__"), reset=True
)

2025-07-10 10:03:44 INFO     xscen.config    Updated the config with ../paths_obs.yml.
2025-07-10 10:03:45 INFO     xscen.config    Updated the config with ../config_obs.yml.


In [44]:
pcat = xs.ProjectCatalog(CONFIG["paths"]["project_catalog"])
df = pcat.df

In [45]:
list(df.columns)

['id',
 'type',
 'processing_level',
 'bias_adjust_institution',
 'bias_adjust_project',
 'mip_era',
 'activity',
 'driving_model',
 'institution',
 'source',
 'experiment',
 'member',
 'xrfreq',
 'frequency',
 'variable',
 'domain',
 'date_start',
 'date_end',
 'version',
 'format',
 'path',
 'performance_base']

In [46]:
df_trimmed = df[['id','type','processing_level','institution','source','variable','xrfreq','frequency','date_start','date_end']]

In [47]:
list(df_trimmed['processing_level'].unique())

['extracted', 'indicators', 'climatology', 'performance', 'spatial_mean']

## Extracted

In [48]:
extracted = df_trimmed[df_trimmed['processing_level']=='extracted']
extracted

Unnamed: 0,id,type,processing_level,institution,source,variable,xrfreq,frequency,date_start,date_end
0,ECMWF_ERA5-Land_NAM_reconstruction,reconstruction,extracted,ECMWF,ERA5-Land,"(tas, tasmin, tasmax, pr)",D,day,1980-01-01,2025-05-31
1,GovCan_RDRS_NAM_reconstruction,reconstruction,extracted,GovCan,RDRS,"(tas, tasmin, tasmax, pr)",D,day,1980-01-01,2018-12-31
2,GovCan_CaSR_NAM_reconstruction,reconstruction,extracted,GovCan,CaSR,"(tas, tasmin, tasmax, pr)",D,day,1980-01-01,2023-12-31
3,USask_EMDNA_OI_NAM_reconstruction,reconstruction,extracted,USask,EMDNA,"(tas, tasmin, tasmax, pr)",D,day,1980-01-01,2018-12-31
4,PCIC_PCICBlend_CAN_reconstruction,reconstruction,extracted,PCIC,PCICBlend,"(tas, tasmin, tasmax, pr)",D,day,1980-01-01,2012-12-31
5,GovCan_AHCCD_CAN_station-tas,station-tas,extracted,GovCan,AHCCD,"(tas, tasmin, tasmax)",D,day,1980-01-01,2023-12-31
6,GovCan_AHCCD_CAN_station-pr,station-pr,extracted,GovCan,AHCCD,"(pr,)",D,day,1980-01-01,2017-12-31


## Indicators

In [49]:
indicators = df_trimmed[df_trimmed['processing_level']=='indicators']
indicators

Unnamed: 0,id,type,processing_level,institution,source,variable,xrfreq,frequency,date_start,date_end
7,GovCan_AHCCD_CAN_station-pr,station-pr,indicators,GovCan,AHCCD,"(pr_mean_annual,)",YS-JAN,yr,1980-01-01,2017-01-01
8,GovCan_AHCCD_CAN_station-pr,station-pr,indicators,GovCan,AHCCD,"(pr_mean_seasonal,)",QS-DEC,qtr,1980-03-01,2017-12-01
23,ECMWF_ERA5-Land_NAM_reconstruction,reconstruction,indicators,ECMWF,ERA5-Land,"(tx_mean_annual, tg_mean_annual, pr_mean_annua...",YS-JAN,yr,1980-01-01,2025-01-01
24,ECMWF_ERA5-Land_NAM_reconstruction,reconstruction,indicators,ECMWF,ERA5-Land,"(tx_mean_seasonal, tg_mean_seasonal, pr_mean_s...",QS-DEC,qtr,1980-03-01,2025-03-01
25,GovCan_AHCCD_CAN_station-tas,station-tas,indicators,GovCan,AHCCD,"(tg_mean_annual, tx_mean_annual, tn_mean_annual)",YS-JAN,yr,1980-01-01,2023-01-01
26,GovCan_AHCCD_CAN_station-tas,station-tas,indicators,GovCan,AHCCD,"(tx_mean_seasonal, tg_mean_seasonal, tn_mean_s...",QS-DEC,qtr,1980-03-01,2023-12-01
27,GovCan_CaSR_NAM_reconstruction,reconstruction,indicators,GovCan,CaSR,"(tg_mean_annual, pr_mean_annual, tx_mean_annua...",YS-JAN,yr,1980-01-01,2023-01-01
28,GovCan_CaSR_NAM_reconstruction,reconstruction,indicators,GovCan,CaSR,"(tx_mean_seasonal, tg_mean_seasonal, pr_mean_s...",QS-DEC,qtr,1980-03-01,2023-12-01
29,GovCan_RDRS_NAM_reconstruction,reconstruction,indicators,GovCan,RDRS,"(pr_mean_annual, tg_mean_annual, tx_mean_annua...",YS-JAN,yr,1980-01-01,2018-01-01
30,GovCan_RDRS_NAM_reconstruction,reconstruction,indicators,GovCan,RDRS,"(tx_mean_seasonal, tg_mean_seasonal, pr_mean_s...",QS-DEC,qtr,1980-03-01,2018-12-01


## Climatology

In [50]:
climatology = df_trimmed[df_trimmed['processing_level']=='climatology']
climatology

Unnamed: 0,id,type,processing_level,institution,source,variable,xrfreq,frequency,date_start,date_end
9,ECMWF_ERA5-Land_NAM_reconstruction,reconstruction,climatology,ECMWF,ERA5-Land,"(pr_mean_seasonal_clim_mean, pr_mean_seasonal_...",QS-DEC,qtr,1980-03-01,2025-03-01
10,ECMWF_ERA5-Land_NAM_reconstruction,reconstruction,climatology,ECMWF,ERA5-Land,"(pr_mean_annual_clim_std, tg_mean_annual_clim_...",YS-JAN,yr,1980-01-01,2025-01-01
11,GovCan_AHCCD_CAN_station-pr,station-pr,climatology,GovCan,AHCCD,"(pr_mean_seasonal_clim_mean, pr_mean_seasonal_...",QS-DEC,qtr,1980-03-01,2017-12-01
12,GovCan_AHCCD_CAN_station-pr,station-pr,climatology,GovCan,AHCCD,"(pr_mean_annual_clim_std, pr_mean_annual_clim_...",YS-JAN,yr,1980-01-01,2017-01-01
13,GovCan_AHCCD_CAN_station-tas,station-tas,climatology,GovCan,AHCCD,"(tg_mean_seasonal_clim_std, tg_mean_seasonal_c...",QS-DEC,qtr,1980-03-01,2023-12-01
14,GovCan_AHCCD_CAN_station-tas,station-tas,climatology,GovCan,AHCCD,"(tg_mean_annual_clim_mean, tg_mean_annual_clim...",YS-JAN,yr,1980-01-01,2023-01-01
15,GovCan_CaSR_NAM_reconstruction,reconstruction,climatology,GovCan,CaSR,"(pr_mean_seasonal_clim_mean, pr_mean_seasonal_...",QS-DEC,qtr,1980-03-01,2023-12-01
16,GovCan_CaSR_NAM_reconstruction,reconstruction,climatology,GovCan,CaSR,"(pr_mean_annual_clim_std, tg_mean_annual_clim_...",YS-JAN,yr,1980-01-01,2023-01-01
17,GovCan_RDRS_NAM_reconstruction,reconstruction,climatology,GovCan,RDRS,"(tg_mean_seasonal_clim_std, pr_mean_seasonal_c...",QS-DEC,qtr,1980-03-01,2018-12-01
18,GovCan_RDRS_NAM_reconstruction,reconstruction,climatology,GovCan,RDRS,"(tg_mean_annual_clim_mean, pr_mean_annual_clim...",YS-JAN,yr,1980-01-01,2018-01-01


## Performance

In [51]:
performance = df_trimmed[df_trimmed['processing_level']=='performance']
performance

Unnamed: 0,id,type,processing_level,institution,source,variable,xrfreq,frequency,date_start,date_end
35,EMDNA_Quebec_GovCan_AHCCD_CAN_station-tas,,performance,,EMDNA,"(tg_mean_annual_rmse,)",fx-yr,,NaT,NaT
36,PCICBlend_Quebec_GovCan_AHCCD_CAN_station-tas,,performance,,PCICBlend,"(tg_mean_annual_rmse,)",fx-yr,,NaT,NaT
37,ERA5-Land_Quebec_GovCan_AHCCD_CAN_station-tas,,performance,,ERA5-Land,"(tg_mean_annual_rmse,)",fx-yr,,NaT,NaT
38,CaSR_Quebec_GovCan_AHCCD_CAN_station-tas,,performance,,CaSR,"(tg_mean_annual_rmse,)",fx-yr,,NaT,NaT
39,RDRS_Quebec_GovCan_AHCCD_CAN_station-tas,,performance,,RDRS,"(tg_mean_annual_rmse,)",fx-yr,,NaT,NaT
40,EMDNA_Quebec_GovCan_AHCCD_CAN_station-tas,,performance,,EMDNA,"(tg_mean_seasonal_rmse,)",fx-qtr,,NaT,NaT
41,ERA5-Land_Quebec_GovCan_AHCCD_CAN_station-tas,,performance,,ERA5-Land,"(tg_mean_seasonal_rmse,)",fx-qtr,,NaT,NaT
42,RDRS_Quebec_GovCan_AHCCD_CAN_station-tas,,performance,,RDRS,"(tg_mean_seasonal_rmse,)",fx-qtr,,NaT,NaT
43,PCICBlend_Quebec_GovCan_AHCCD_CAN_station-tas,,performance,,PCICBlend,"(tg_mean_seasonal_rmse,)",fx-qtr,,NaT,NaT
44,CaSR_Quebec_GovCan_AHCCD_CAN_station-tas,,performance,,CaSR,"(tg_mean_seasonal_rmse,)",fx-qtr,,NaT,NaT


## Spatial mean

In [52]:
spatial_mean = df_trimmed[df_trimmed['processing_level']=='spatial_mean']
spatial_mean

Unnamed: 0,id,type,processing_level,institution,source,variable,xrfreq,frequency,date_start,date_end
55,RDRS_Quebec_GovCan_AHCCD_CAN_station-tas,,spatial_mean,,RDRS,"(tg_mean_annual_rmse_mean,)",fx-yr,,NaT,NaT
56,PCICBlend_Quebec_GovCan_AHCCD_CAN_station-tas,,spatial_mean,,PCICBlend,"(tg_mean_seasonal_rmse_mean,)",fx-qtr,,NaT,NaT
57,CaSR_Quebec_GovCan_AHCCD_CAN_station-pr,,spatial_mean,,CaSR,"(pr_mean_annual_rmse_mean,)",fx-yr,,NaT,NaT
58,RDRS_Quebec_GovCan_AHCCD_CAN_station-pr,,spatial_mean,,RDRS,"(pr_mean_seasonal_rmse_mean,)",fx-qtr,,NaT,NaT


## Checking periods

In [53]:
ds_dict = pcat.search(processing_level='indicators', frequency=['yr'],source='CaSR').to_dataset_dict()
ds_dict.keys()


--> The keys in the returned dictionary of datasets are constructed as follows:
	'id.domain.processing_level.xrfreq'


dict_keys(['GovCan_CaSR_NAM_reconstruction.Quebec.indicators.YS-JAN'])

In [54]:
ds = ds_dict['GovCan_CaSR_NAM_reconstruction.Quebec.indicators.YS-JAN']

In [55]:
ds.tg_mean_annual.attrs

{'cell_methods': 'time: mean (interval: 1 day) time: mean over days',
 'description': 'Annual mean of daily mean temperature.',
 'grid_mapping': 'rotated_pole',
 'history': "[2025-07-02 16:14:06] tg_mean_annual: indicators_obs.TG_MEAN_ANNUAL(tas=tas) with options check_missing=wmo, missing_options={'nm': 11, 'nc': 5} - xclim version: 0.55.1",
 'long_name': 'Mean daily mean temperature',
 'original_variable': 'P_TT_1.5m',
 'standard_name': 'air_temperature',
 'units': 'Â°C',
 'units_metadata': 'temperature: unknown'}

In [56]:
ds_dict = pcat.search(processing_level='indicators', frequency=['yr']).to_dataset_dict()
for k in ds_dict.keys():
    print(k)


--> The keys in the returned dictionary of datasets are constructed as follows:
	'id.domain.processing_level.xrfreq'


GovCan_CaSR_NAM_reconstruction.Quebec.indicators.YS-JAN
USask_EMDNA_OI_NAM_reconstruction.Quebec.indicators.YS-JAN
GovCan_RDRS_NAM_reconstruction.Quebec.indicators.YS-JAN
PCIC_PCICBlend_CAN_reconstruction.Quebec.indicators.YS-JAN
ECMWF_ERA5-Land_NAM_reconstruction.Quebec.indicators.YS-JAN
GovCan_AHCCD_CAN_station-pr.Quebec.indicators.YS-JAN
GovCan_AHCCD_CAN_station-tas.Quebec.indicators.YS-JAN
