In [134]:
from pathlib import Path
import xarray as xr
import numpy as np
import tiledb
import pandas as pd
from datetime import datetime
import time

In [51]:
SATELLITE_DATA_PATH = Path('~/data/EUMETSAT/reprojected/just_UK/2019/06/03/').expanduser()

In [60]:
netcdf_filenames = SATELLITE_DATA_PATH.glob('*.nc')
netcdf_filenames = list(netcdf_filenames)
netcdf_filenames.sort()
num_nc_files = len(netcdf_filenames)
print(f'{num_nc_files} netcdf files found.')

18 netcdf files found.


In [67]:
data = []
for i, filename in enumerate(netcdf_filenames):
    ds = xr.open_dataset(filename)['HRV']
    data.append(ds)

In [69]:
%%time
da = xr.concat(data, dim='time')

In [129]:
ARRAY_NAME = 'EUMETSAT_ONE_SHOT'

In [116]:
# Name of array.
def create_array():
    time_dim = tiledb.Dim(
            name="time", 
            domain=(0, len(da['time'])-1), 
            tile=4,
            dtype=np.uint16)
    y_dim = tiledb.Dim(name="y", domain=(0, len(da['y'])-1), tile=32, dtype=np.uint16)
    x_dim = tiledb.Dim(name="x", domain=(0, len(da['x'])-1), tile=32, dtype=np.uint16)
    dom = tiledb.Domain(time_dim, y_dim, x_dim)

    # The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
    attr = tiledb.Attr(
        name="HRV", dtype=np.float32,
        filters=tiledb.FilterList([tiledb.BitWidthReductionFilter(), tiledb.ZstdFilter(level=10)]))

    schema = tiledb.ArraySchema(
        domain=dom, sparse=False,
        attrs=[attr])

    # Create the (empty) array on disk.
    tiledb.DenseArray.create(ARRAY_NAME, schema)

In [117]:
create_array()

In [118]:
%%time
# Write data
with tiledb.DenseArray(ARRAY_NAME, mode='w') as array:
    array[:] = da.values

CPU times: user 12.2 s, sys: 6.26 s, total: 18.5 s
Wall time: 19.4 s
