## 5. Single Point of Input

We now design the script such that uses an intake catalog with only kerchunked datasets as input.

In [1]:
%%writefile xpublish_spoi.py

ssl_keyfile="/work/bm0021/k204210/cloudify/workshop/key.pem"
ssl_certfile="/work/bm0021/k204210/cloudify/workshop/cert.pem"

from cloudify.plugins.stacer import *
from cloudify.utils.daskhelper import *
from cloudify.plugins.kerchunk import *
import xarray as xr
import xpublish as xp
import asyncio
import nest_asyncio
import sys
import os
import intake
def is_port_free(port, host="localhost"):
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        return s.connect_ex((host, port)) != 0  # Returns True if the port is free

def find_free_port(start=5000, end=5100, host="localhost"):
    for port in range(start, end + 1):
        if is_port_free(port, host):
            return port
    return None  # No free ports found

port = find_free_port(9000,9100)
if not port:
    raise ValueError("Could not find a free port for service")
    
nest_asyncio.apply()
STORAGE_OPTIONS=dict(
    cache_size=0
)

def get_options(desc):
    options = dict(storage_options=desc["args"].get("storage_options", {}))
    options["storage_options"].update(STORAGE_OPTIONS)
    return options

if __name__ == "__main__":  # This avoids infinite subprocess creation
    import dask
    zarrcluster = asyncio.get_event_loop().run_until_complete(get_dask_cluster())
    os.environ["ZARR_ADDRESS"]=zarrcluster.scheduler._address
    
    caturi=sys.argv[1]
    cat = intake.open_catalog(caturi)

    dsdict={}
    mapper_dict={}
    for dsname in list(cat):
        desc = cat[dsname].describe()
        testurl = desc["args"]["urlpath"]
        if type(testurl)!=str:
            continue
        elif not testurl.startswith("reference::"):
            continue
        options=get_options(desc)
        ds=cat[dsname](**options).to_dask()

        sp = ds.encoding["source"]
        ds=ds.drop_encoding()   
        ds.encoding["source"] = sp
        
        mapper_dict[sp] = fsspec.get_mapper(sp, **options)
        dsdict[dsname]=ds
        
    kp = KerchunkPass()
    kp.mapper_dict = mapper_dict

    collection = xp.Rest(dsdict)
    collection.register_plugin(Stac())
    collection.register_plugin(kp)
    collection.serve(
        host="0.0.0.0",
        port=port,
        ssl_keyfile=ssl_keyfile,
        ssl_certfile=ssl_certfile
    )

Overwriting xpublish_spoi.py


We run this app with an intake catalog for all [kerchunked ERA5 data](://gitlab.dkrz.de/data-infrastructure-services/era5-kerchunks/-/raw/main/main.yaml).

In [2]:
%%bash --bg
source activate /work/bm0021/conda-envs/cloudify
python xpublish_spoi.py https://gitlab.dkrz.de/data-infrastructure-services/era5-kerchunks/-/raw/main/main.yaml

If sth goes wrong, you can check for *cloudify* processes that you can *kill* by ID.

In [3]:
!ps -ef | grep cloudify

k204210    52510 4121939  0 10:12 ?        00:00:01 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-c6562e74-6d50-4cf3-92d0-27e4f4cae6f8.json
k204210    56270 4121939  0 10:18 ?        00:00:07 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-e22a5303-3b28-41cb-b092-ed92a8ff6221.json
k204210    67839 4121939  0 10:42 ?        00:00:13 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-33489dce-534b-42ed-bbe7-6d125e3f6167.json
k204210   198403 4121939 11 11:34 ?        00:00:12 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-16ae079b-b9fb-4460-bc7c-808797637e88.json
k204210   200909 4125444  0 11:36 pts/2    00:00:00 /bin

In [31]:
!kill 1104568

In [4]:
port=9000
hostname=!echo $HOSTNAME
hosturl="https://"+hostname[0]+":"+str(port)
intake_url='/'.join([hosturl,"intake.yaml"])
print(intake_url)

https://l40038.lvt.dkrz.de:9000/intake.yaml


In [5]:
import intake
storage_options=dict(verify_ssl=False)
cat=intake.open_catalog(
    intake_url,
    storage_options=storage_options
)
list(cat)

['pressure-level_analysis_daily',
 'pressure-level_analysis_monthly',
 'surface_analysis_daily',
 'surface_analysis_hourly',
 'surface_analysis_monthly',
 'surface_forecast_hourly',
 'surface_forecast_monthly']

In [6]:
dsname='pressure-level_analysis_monthly'
ds=cat[dsname](storage_options=storage_options).to_dask()
ds

  'dims': dict(self._ds.dims),


Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.14 MiB 4.14 MiB Shape (542080,) (542080,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  1,

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.14 MiB 4.14 MiB Shape (542080,) (542080,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  1,

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 150.63 GiB 4.14 MiB Shape (1008, 37, 542080) (1, 1, 542080) Dask graph 37296 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  37  1008,

Unnamed: 0,Array,Chunk
Bytes,150.63 GiB,4.14 MiB
Shape,"(1008, 37, 542080)","(1, 1, 542080)"
Dask graph,37296 chunks in 2 graph layers,37296 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [7]:
ds["t"].isel(time=434,level=35).load()