# Use DKRZ cloud (swift) catalog

In [None]:
# direct link to catalog files, but let us use the master catalog and navigate to the cloud catalog
cat_file_public = "https://gitlab.dkrz.de/data-infrastructure-services/intake-esm/-/raw/master/esm-collections/cloud-access/dkrz_cmip6_cloud.json"
cat_file_dkrz = "/pool/data/Catalogs/dkrz_cmip6_cloud.json"

In [None]:
import numpy as np                    # fundamental package for scientific computing
import pandas as pd                   # data analysis and manipulation tool
import xarray as xr                   # handling labelled multi-dimensional arrays
import intake                         # to find data in a catalog, this notebook explains how it works

## Open DKRZ master catalog

In [None]:
dkrz_catalog=intake.open_catalog("/pool/data/Catalogs/dkrz_catalog.yaml")
list(dkrz_catalog)

### Navigate to cloud catalog

In [None]:
dkrz_cloud = dkrz_catalog.dkrz_cmip6_cloud
print(dkrz_cloud)

In [None]:
dkrz_cloud.df.head()

### Query the cloud catalog

In [None]:
query = dict(
    source_id      = 'MPI-ESM1-2-HR', # the model 
    variable_id    = "pr", 
    table_id       = "day", 
    experiment_id  = 'historical' 
)

# Intake looks for the query we just defined in the catalog of the CMIP6 data pool at DKRZ

cat = dkrz_cloud.search(**query)

# good practice to delete the memory consuming catalog if no longer needed
#del dkrz_cloud

# Show query results
cat.df

## Access the data

In [None]:
xr_dict = cat.to_dataset_dict(cdf_kwargs=dict(chunks=dict(time=1)),
                             zarr_kwargs=dict(consolidated=True)
                                              #decode_times=True,
                                              #use_cftime=True)
                             )
xr_dict

In [None]:
xr_dset = xr_dict.popitem()[1]
xr_dset

In [None]:
xr_dset.pr.isel(time=0).plot()