# Building a STAC Catalogue in Zarr
his notebook demonstrates how to build a STAC catalogue using the Zarr format. Zarr is an efficient, scalable format for the storage of chunked, compressed, N-dimensional arrays. This notebook will guide you through the steps to create, manipulate, and visualize a STAC catalogue in Zarr.

In [142]:
import earthkit.data

## STAC catalogue
In this section we will explore how to build a STAC catalogue, and which are its main features

In [143]:
import os
import json
import rasterio
import urllib.request
import pystac

from datetime import datetime, timezone
from shapely.geometry import Polygon, mapping
from tempfile import TemporaryDirectory

In [144]:
# Set temporary directory to store source data
tmp_dir = TemporaryDirectory()
tmp_path = os.path.join(tmp_dir.name, 'example.zarr')

ds = earthkit.data.from_source("cds",
                                    f"reanalysis-era5-single-levels",
                                    variable=["2m_temperature"],
                                    product_type="reanalysis",
                                    area=[46, 6, 44, 8], 
                                    grid=[0.25, 0.25],
                                    date=['2004-01-01', '2004-01-02', '2004-01-03'],
                                    time=["00:00", "03:00", "06:00","09:00", "12:00", "15:00","18:00", "21:00"]
                                )

# Rechunck and store data
ds.to_xarray().sel(latitude=45, longitude=7, method='nearest').chunk(chunks={'time': -1}).to_zarr(tmp_path)

2024-10-15 14:22:07,158 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-10-15 14:22:07,161 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-10-15 14:22:07,162 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**


<xarray.backends.zarr.ZarrStore at 0x20dc57730c0>

In [145]:
catalog = pystac.Catalog(id='tutorial-catalog', description='This catalog is a basic demonstration catalog utilizing a file from CDS ERA5 on single levels.')

In [146]:
# Now we need to populate the stac catalog attributes

In [147]:
item = pystac.Item(id='era5-prova',
                 geometry={
                     "type": "Point",
                     "coordinates": [7., 45.]
                 },
                 bbox=[7., 45., 7., 45.],
                 datetime=datetime.now(timezone.utc),
                 properties={})

In [148]:
item

In [149]:
# Add Asset and all its information to Item 
item.add_asset(
    key='image',
    asset=pystac.Asset(
        href=tmp_path,
        media_type="application/vnd+zarr",
        roles=["data"]
    )
)

In [150]:
collection = pystac.Collection(
    id="era5-temperature-zarr",
    description="ERA5 reanalysis temperature data in Zarr format",
    extent=pystac.Extent(
        spatial=pystac.SpatialExtent([[-180.0, -90.0, 180.0, 90.0]]),
        temporal=pystac.TemporalExtent([[datetime(2020, 1, 1), datetime(2020, 12, 31)]])
    ),
    license="CC-BY-4.0"
)



In [151]:
collection.add_item(item)
catalog.add_child(collection)


In [152]:
# Salva il catalogo STAC in locale
catalog.normalize_and_save("era5_zarr_catalog", catalog_type=pystac.CatalogType.SELF_CONTAINED)

In [153]:
#tmp_dir.cleanup()

In [154]:
catalog = pystac.Catalog.from_file('era5_zarr_catalog/catalog.json')

In [155]:
# Esplora i contenuti del catalogo
for item in catalog.get_all_items():
    print(item.id, item.geometry, item.assets)


#catalog.sea

era5-prova {'type': 'Point', 'coordinates': [7.0, 45.0]} {'image': <Asset href=C:/Users/jacop/AppData/Local/Temp/tmp48wqiost/example.zarr>}


In [156]:
for item in catalog.get_all_items():
    print(f"Item ID: {item.id}")
    print(f"Geometry: {item.geometry}")
    print(f"Assets: {item.assets}")

Item ID: era5-prova
Geometry: {'type': 'Point', 'coordinates': [7.0, 45.0]}
Assets: {'image': <Asset href=C:/Users/jacop/AppData/Local/Temp/tmp48wqiost/example.zarr>}


In [157]:
# from pystac_client import Client
# from shapely.geometry import Point

# # Carica il catalogo STAC locale (sostituisci con il percorso del tuo catalogo)
# catalog = Client.open("era5_zarr_catalog/catalog.json")

from rtree import index
from shapely.geometry import shape, Point

# Crea un indice spaziale
spatial_index = index.Index()

# Popola l'indice con gli item del catalogo
item_map = {}
for idx, item in enumerate(catalog.get_all_items()):
    if item.geometry:
        item_geometry = shape(item.geometry)
        spatial_index.insert(idx, item_geometry.bounds)
        item_map[idx] = item

# Definisci il punto griglia
latitude = 45.
longitude = 7.
point_of_interest = Point(longitude, latitude)

# Cerca gli item che contengono il punto
matching_indices = list(spatial_index.intersection((longitude, latitude, longitude, latitude)))

# Ottieni gli item corrispondenti
matching_items = [item_map[idx] for idx in matching_indices]

# Visualizza gli Item trovati
for item in matching_items:
    print(f"Item ID: {item.id}")
    print(f"Assets: {item.assets}")

Item ID: era5-prova
Assets: {'image': <Asset href=C:/Users/jacop/AppData/Local/Temp/tmp48wqiost/example.zarr>}
