# Create Zarr

This code creates a zarr file from the collection of `.tif` files previously downloaded. Zarr is a much more efficient file format

In [1]:
from pathlib import Path
import datetime
import pandas as pd
import xarray as xr
import rioxarray as rioxr
import dask
import zarr

In [2]:
# where is the data
dataPath = Path('Data/BangladeshTimeSeries/') #this is the path to the tif files

In [3]:
# where do we want to save the zarr files
zarrOutput = (dataPath.parent/'BangladeshTimeSeriesZarr')
zarrOutput.mkdir()

In [4]:
# find all files in data source path
files = list(dataPath.glob('*.tiff'))

In [5]:
# extract dates from file names
dates = pd.DatetimeIndex([pd.Timestamp(datetime.datetime.fromtimestamp(int(file.stem) / 1000.0, tz=datetime.timezone.utc)) for file in files])

In [8]:
# create dates variable
time = xr.Variable('time', dates)

In [9]:
# open all `tif` files and concatenate them into a xarray dataset
chunks = {'x': None, 'y': None, 'band': None}
data = xr.concat(map(lambda f: rioxr.open_rasterio(f, chunks=chunks), files), dim=time)

In [10]:
# save result as zarr
data.to_zarr(zarrOutput)

<xarray.backends.zarr.ZarrStore at 0x17db21ee0>