# NetCDF to tiledb

1. use dask.array.to_tiledb()

In [None]:
import xarray as xr

ds = xr.open_dataset('/data/era5/raw/2m_temperature/2m_temperature-1985.nc')
chunked = ds.chunk({"time": 24*10})
uri = 'data/tiledb/2m_temperature_1985'
chunked['t2m'].data.to_tiledb(uri)

2. use tiledb.cf.NetCDF4ConverterEngine

In [None]:
# use tiledb.cf
import tiledb
import tiledb.cf
import numpy as np

netcdf_file = "/data/era5/raw/2m_temperature/2m_temperature-1985.nc"
group_uri = "data/tiledb/2m_temperature_1985_group"

# Auto-generate NetCDF to TileDB conversion from a NetCDF file.
converter = tiledb.cf.NetCDF4ConverterEngine.from_file(
    netcdf_file,
    # dim_dtype=np.uint16,
)
converter

In [None]:
array_time = converter.get_array_creator_by_attr("time.data")
array_time.name = "time"
array_lat = converter.get_array_creator_by_attr("latitude.data")
array_lat.name = "latitude"
array_lon = converter.get_array_creator_by_attr("longitude.data")
array_lon.name = "longitude"
array_t2m = converter.get_array_creator_by_attr("t2m")
array_t2m.name = "t2m"
# array_temp.domain_creator.tiles = (500, 500, 500)
array_t2m.domain_creator.tiles = (31*24, 721, 1440)
converter.convert_to_group(group_uri)

# Tiledb Query

In [None]:
import tiledb

In [None]:
with tiledb.open(f'{group_uri}/t2m') as A:
    # print(A.schema)
    print(A[:, 0, 0]['t2m'].shape)

In [None]:
uri = 'data/tiledb/2m_temperature_1985'
A = tiledb.open(uri)
# q = A.query(attrs=(A.attr(0).name,), coords=True)
# q = A.query(cond="attr('')>265", coords=True)
# q[:]

In [None]:
attr_t2m = A.attr(0)

In [None]:
attr_t2m.name

In [None]:
q = A.query(cond=f"attr('')>264")

In [None]:
q[0].shape

In [None]:
A.close()

3. read via dask.array.from_tiledb()

In [None]:
import dask.array as da
uri = 'data/tiledb/2m_temperature_1985'
tdb_ar = da.from_tiledb(uri)