# TileDB-CF Examples with GOES-R

Use the tools from the past notebooks to play with some GOES-R data

In [1]:
import tiledb
import tiledb.cf

import netCDF4
import numpy as np
import xarray as xr

In [2]:
! aws s3 cp s3://noaa-goes16/ABI-L1b-RadF/2021/258/14/OR_ABI-L1b-RadF-M6C01_G16_s20212581400226_e20212581409534_c20212581409580.nc ../data/OR_ABI-L1b-RadF-M6C01_G16_s20212581400226_e20212581409534_c20212581409580.nc

download: s3://noaa-goes16/ABI-L1b-RadF/2021/258/14/OR_ABI-L1b-RadF-M6C01_G16_s20212581400226_e20212581409534_c20212581409580.nc to ../data/OR_ABI-L1b-RadF-M6C01_G16_s20212581400226_e20212581409534_c20212581409580.nc


In [3]:
example1_input_file = "../data/OR_ABI-L1b-RadF-M6C01_G16_s20212581400226_e20212581409534_c20212581409580.nc"
example1_group_uri = "arrays/ABI-L1b-RadF-M6C01_G16_s20212581400226_e20212581409534_c20212581409580"
example2_group_uri = "arrays/RadF-M6C01-group"

In [4]:
# clean-up previous runs
import shutil

try:
    shutil.rmtree(example1_group_uri)
except:
    pass

NetCDF4ConverterEngine.from_file parameters:

    * input_file: The input NetCDF file to generate the converter engine from.
    * group_path: The path to the NetCDF group to copy data from. Use ``'/'`` for the root group.
    * unlimited_dim_size: The size of the domain for TileDB dimensions created from unlimited NetCDF dimensions. If ``None``, the current size of the NetCDF dimension will be used.
    * dim_dtype: The numpy dtype for TileDB dimensions.
    * tiles_by_var: A map from the name of a NetCDF variable to the tiles of the dimensions of the variable in the generated TileDB array.
    * tiles_by_dims: A map from the name of NetCDF dimensions defining a variable to the tiles of those dimensions in the generated TileDB array.
    * coords_to_dims: If ``True``, convert the NetCDF coordinate variable into a TileDB dimension for sparse arrays. Otherwise, convert the coordinate dimension into a TileDB dimension and the coordinate variable into a TileDB attribute.
    * collect_attrs: If True, store all attributes with the same dimensions in the same array. Otherwise, store each attribute in a scalar array.

In [5]:
converter = tiledb.cf.NetCDF4ConverterEngine.from_file(input_file=example1_input_file)
converter

0
"NetCDF empty dimension → SharedDim(name=__scalars, domain=(0, 0), dtype='uint64')"
"NetCDFDimension(name=y, size=10848) → SharedDim(name=y, domain=(0, 10847), dtype='uint64')"
"NetCDFDimension(name=x, size=10848) → SharedDim(name=x, domain=(0, 10847), dtype='uint64')"
"NetCDFDimension(name=number_of_time_bounds, size=2) → SharedDim(name=number_of_time_bounds, domain=(0, 1), dtype='uint64')"
"NetCDFDimension(name=number_of_image_bounds, size=2) → SharedDim(name=number_of_image_bounds, domain=(0, 1), dtype='uint64')"
"NetCDFDimension(name=band, size=1) → SharedDim(name=band, domain=(0, 0), dtype='uint64')"
"NetCDFDimension(name=num_star_looks, size=24) → SharedDim(name=num_star_looks, domain=(0, 23), dtype='uint64')"

0
"NetCDF empty dimension → tiledb.Dim(name=__scalars, domain=(0, 0), dtype='uint64', tile=None)"

0
"→ tiledb.Attr(name=t, dtype='float64', var=False, nullable=False)"
"→ tiledb.Attr(name=goes_imager_projection, dtype='int32', var=False, nullable=False)"
"→ tiledb.Attr(name=y_image, dtype='float32', var=False, nullable=False)"
"→ tiledb.Attr(name=x_image, dtype='float32', var=False, nullable=False)"
"→ tiledb.Attr(name=nominal_satellite_subpoint_lat, dtype='float32', var=False, nullable=False)"
"→ tiledb.Attr(name=nominal_satellite_subpoint_lon, dtype='float32', var=False, nullable=False)"
"→ tiledb.Attr(name=nominal_satellite_height, dtype='float32', var=False, nullable=False)"
"→ tiledb.Attr(name=geospatial_lat_lon_extent, dtype='float32', var=False, nullable=False)"
"→ tiledb.Attr(name=yaw_flip_flag, dtype='int8', var=False, nullable=False)"
"→ tiledb.Attr(name=esun, dtype='float32', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=0
sparse=False
coords_filters=None

0
"NetCDFDimension(name=band, size=1) → tiledb.Dim(name=band, domain=(0, 0), dtype='uint64', tile=None)"

0
"→ tiledb.Attr(name=band_id, dtype='int8', var=False, nullable=False)"
"→ tiledb.Attr(name=band_wavelength, dtype='float32', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=0
sparse=False
coords_filters=None

0
"NetCDFDimension(name=num_star_looks, size=24) → tiledb.Dim(name=num_star_looks, domain=(0, 23), dtype='uint64', tile=None)"

0
"→ tiledb.Attr(name=t_star_look, dtype='float64', var=False, nullable=False)"
"→ tiledb.Attr(name=band_wavelength_star_look, dtype='float32', var=False, nullable=False)"
"→ tiledb.Attr(name=star_id, dtype='int16', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=0
sparse=False
coords_filters=None

0
"NetCDFDimension(name=number_of_image_bounds, size=2) → tiledb.Dim(name=number_of_image_bounds, domain=(0, 1), dtype='uint64', tile=None)"

0
"→ tiledb.Attr(name=y_image_bounds, dtype='float32', var=False, nullable=False)"
"→ tiledb.Attr(name=x_image_bounds, dtype='float32', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=0
sparse=False
coords_filters=None

0
"NetCDFDimension(name=number_of_time_bounds, size=2) → tiledb.Dim(name=number_of_time_bounds, domain=(0, 1), dtype='uint64', tile=None)"

0
"→ tiledb.Attr(name=time_bounds, dtype='float64', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=0
sparse=False
coords_filters=None

0
"NetCDFDimension(name=x, size=10848) → tiledb.Dim(name=x, domain=(0, 10847), dtype='uint64', tile=None)"

0
"→ tiledb.Attr(name=x.data, dtype='int16', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=0
sparse=False
coords_filters=None

0
"NetCDFDimension(name=y, size=10848) → tiledb.Dim(name=y, domain=(0, 10847), dtype='uint64', tile=None)"

0
"→ tiledb.Attr(name=y.data, dtype='int16', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=0
sparse=False
coords_filters=None

0
"NetCDFDimension(name=y, size=10848) → tiledb.Dim(name=y, domain=(0, 10847), dtype='uint64', tile=226)"
"NetCDFDimension(name=x, size=10848) → tiledb.Dim(name=x, domain=(0, 10847), dtype='uint64', tile=226)"

0
"→ tiledb.Attr(name=Rad, dtype='int16', var=False, nullable=False)"
"→ tiledb.Attr(name=DQF, dtype='int8', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=0
sparse=False
coords_filters=None


In [6]:
# Set desired properties

In [7]:
# Test for issues by creating a schema
converter.to_schema()

0
"Dim(name='dim', domain=(0, 0), tile='1', dtype='int32')"

0
"Attr(name='attr', dtype='int32', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=10000
sparse=False
coords_filters=FilterList([ZstdFilter(level=-1)])

0
"Dim(name='__scalars', domain=(0, 0), tile='1', dtype='uint64')"

0
"Attr(name='t', dtype='float64', var=False, nullable=False)"
"Attr(name='goes_imager_projection', dtype='int32', var=False, nullable=False)"
"Attr(name='y_image', dtype='float32', var=False, nullable=False)"
"Attr(name='x_image', dtype='float32', var=False, nullable=False)"
"Attr(name='nominal_satellite_subpoint_lat', dtype='float32', var=False, nullable=False)"
"Attr(name='nominal_satellite_subpoint_lon', dtype='float32', var=False, nullable=False)"
"Attr(name='nominal_satellite_height', dtype='float32', var=False, nullable=False)"
"Attr(name='geospatial_lat_lon_extent', dtype='float32', var=False, nullable=False)"
"Attr(name='yaw_flip_flag', dtype='int8', var=False, nullable=False)"
"Attr(name='esun', dtype='float32', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=10000
sparse=False
coords_filters=FilterList([ZstdFilter(level=-1)])

0
"Dim(name='band', domain=(0, 0), tile='1', dtype='uint64')"

0
"Attr(name='band_id', dtype='int8', var=False, nullable=False)"
"Attr(name='band_wavelength', dtype='float32', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=10000
sparse=False
coords_filters=FilterList([ZstdFilter(level=-1)])

0
"Dim(name='num_star_looks', domain=(0, 23), tile='24', dtype='uint64')"

0
"Attr(name='t_star_look', dtype='float64', var=False, nullable=False)"
"Attr(name='band_wavelength_star_look', dtype='float32', var=False, nullable=False)"
"Attr(name='star_id', dtype='int16', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=10000
sparse=False
coords_filters=FilterList([ZstdFilter(level=-1)])

0
"Dim(name='number_of_image_bounds', domain=(0, 1), tile='2', dtype='uint64')"

0
"Attr(name='y_image_bounds', dtype='float32', var=False, nullable=False)"
"Attr(name='x_image_bounds', dtype='float32', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=10000
sparse=False
coords_filters=FilterList([ZstdFilter(level=-1)])

0
"Dim(name='number_of_time_bounds', domain=(0, 1), tile='2', dtype='uint64')"

0
"Attr(name='time_bounds', dtype='float64', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=10000
sparse=False
coords_filters=FilterList([ZstdFilter(level=-1)])

0
"Dim(name='x', domain=(0, 10847), tile='10848', dtype='uint64')"

0
"Attr(name='x.data', dtype='int16', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=10000
sparse=False
coords_filters=FilterList([ZstdFilter(level=-1)])

0
"Dim(name='y', domain=(0, 10847), tile='10848', dtype='uint64')"

0
"Attr(name='y.data', dtype='int16', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=10000
sparse=False
coords_filters=FilterList([ZstdFilter(level=-1)])

0
"Dim(name='y', domain=(0, 10847), tile='226', dtype='uint64')"
"Dim(name='x', domain=(0, 10847), tile='226', dtype='uint64')"

0
"Attr(name='Rad', dtype='int16', var=False, nullable=False)"
"Attr(name='DQF', dtype='int8', var=False, nullable=False)"

0
cell_order=row-major
tile_order=row-major
capacity=10000
sparse=False
coords_filters=FilterList([ZstdFilter(level=-1)])


In [8]:
# Create a TileDB Group from the schema

In [9]:
# Query & plot data from TileDB