In [53]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
from odc.stac import configure_rio
from dask_jobqueue import SLURMCluster
from dask.distributed import Client as daskClient
# from dask import compute as dask_compute
# from xarray import open_zarr as xr_open_zarr

from stacathome import MaxiCube

configure_rio(cloud_defaults=True, aws={"aws_unsigned": True})

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Construct Cube Object

In [54]:
aoi = 'somalia'
requested_bands = ['B02', 'B03', 'B04', 'B8A']
resolution = 0.00018  # 20  # 0.00018
crs = 4326  # 32737  # 4326
chunksize_xy = 256
chunksize_t = 1000

out_path = '/Net/Groups/BGI/data/DataStructureMDI/DATA/Incoming/Sentinel/_2/S2A_L2A/ForSites/Sentinel2tiles'
os.makedirs(out_path, exist_ok=True)

zarr_store = '/Net/Groups/BGI/scratch/mzehner/VCI_Somalia/Somalia_S2_2014_2026.zarr'

mxc = MaxiCube(aoi=aoi,
               requested_bands=requested_bands,
               crs=crs,
               resolution=resolution,
               chunksize_xy=chunksize_xy,
               chunksize_t=chunksize_t,
               path=out_path,
               zarr_path=zarr_store,
               )

# this is for no longer present scenes, which are still stored in the item file
# mxc.req_items = mxc.items_local_global
# mxc.compare_local(report=True)
# mxc.items_local_global = mxc.req_items_local
# mxc.req_items = mxc.items_local_global
# mxc.compare_local(report=True)

# mxc.save_items()
# mxc.save()
mxc.status()
# local_ = mxc.local_assets(rerequest=True)
# mxc.items_local_global = local_
# len(local_)

Loading local assets
Zarr already exists at /Net/Groups/BGI/scratch/mzehner/VCI_Somalia/Somalia_S2_2014_2026.zarr. Skipping creation. Set overwrite=True to overwrite.
Items requested: 0, Items requestedlocal : 0, Items local: 1242


# Or load from disk

In [None]:
from stacathome import load_maxicube
mxc_loaded = load_maxicube(out_path + '/saved.maxicube')

In [61]:
mxc.status()

Items requested: 1176, Items requestedlocal : 910, Items local: 1996


# Plot locally available data

In [70]:
mxc.plot(subset_chunks_by=50)

# Parallel request and download of tiles using SLURM

In [19]:
cluster = SLURMCluster(
    queue='work',                  # Specify the SLURM queue
    processes=1,                # Number of processes per job
    cores=1,                          # Number of cores per job
    memory='768MB',                    # Memory per job
    walltime='03:00:00',              # Job duration (hh:mm:ss)
)

# Scale up the number of workers
# cluster.scale(jobs=8)  # Adjust the number of jobs/workers
cluster.adapt(minimum=0, maximum=4)

# Create a Dask client that connects to the cluster
client = daskClient(cluster)

# Check cluster status
cluster

Perhaps you already have a cluster running?
Hosting the HTTP server on port 37603 instead


0,1
Dashboard: http://10.0.30.56:37603/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.0.30.56:33007,Workers: 0
Dashboard: http://10.0.30.56:37603/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [21]:
process = mxc.download_all('2015-01-01', '2024-10-31', subset=(8.89, 48.53), enlarge_by_n_chunks=0, client=client)

subset by lat lon
8100
Updated local items, 1243 items


once the download finished run the update call on the processed items:

In [69]:
mxc._update_items_local_global(process)


Updated local items, 2152 items


In [60]:
# remove files from the current mxc object, optional delete them from the disk
# mxc.remove_tile('38PLU', delete_assets=False)

Items requested: 0, Items requestedlocal : 0, Items local: 1242


In [18]:
client.close()
cluster.close()

In [9]:
# mxc.save_items()
mxc.save()

Updated local items, 2575 items


# Load the requested data as on-the-fly xarray

In [None]:
otf_cube = mxc.load_otf_cube(subset=6, enlarge_by_n_chunks=5)
otf_cube

In [None]:
otf_cube.B02.mean(dim=[mxc.dimension_names['longitude'],
                  mxc.dimension_names['latitude']]).plot()

In [None]:
(otf_cube.B02.where(otf_cube.B02 != 0, np.nan).median(dim='time')/10000).plot()

# Insert data into a larger consistent cube TODO: check out icechunk?

In [None]:
cluster = SLURMCluster(
    queue='work',                  # Specify the SLURM queue
    name='mz-worker',            # Name of the Dask worker jobs
    processes=1,                    # Number of processes per job
    cores=1,                          # Number of cores per job
    memory='4GB',                    # Memory per job
    walltime='03:00:00',              # Job duration (hh:mm:ss)
)
cluster.adapt(minimum=0, maximum=20)
client = daskClient(cluster)
cluster

In [20]:
mxc.check_assets_for_read_errors()

Updated local items, 2431 items


100%|██████████| 122/122 [00:00<00:00, 12945.38it/s]
This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.


In [None]:
res = mxc.fill_large_cube(client=client)

In [62]:
client.close()
cluster.close()

# Addressing the large dataset as a minicube

In [54]:
mc_int = mxc.get_chunk(0, ('2021-01-01', '2021-01-31')).compute()
mc_int

In [None]:
mc_int.B02.mean(dim=[mxc.dimension_names['latitude'],
                mxc.dimension_names['longitude']]).plot()

In [None]:
def ndvi(band_red, band_nir):
    return (band_nir - band_red) / (band_nir + band_red)


mc_float = mc_int.where(mc_int.B02 != 0, np.nan).dropna(dim='time', how='all')
ndvi(mc_float.B04.mean(dim='time'), mc_float.B8A.mean(dim='time')).plot.imshow()

In [None]:
mc_drop_fill = mxc.get_chunk(
    0, ('2021-01-01', '2021-01-31'), drop_fill=True).compute()
mc_drop_fill

# Handling new requests and loading local data as otf cube

In [None]:
mxc.request_items('2015-01-01', '2026-01-31', subset=750, new_request=True)

In [None]:
otf_cube = mxc.load_otf_cube(mxc.subset(lat_lon=(48.53, 8.89))[0], mxc.req_items_local, drop_fill=True)
otf_cube.isel(time=0).B02.plot.imshow()

In [None]:
subset = 1070
mxc.request_items('2021-01-01', '2024-05-01', subset=subset, new_request=True)
otf_cube = mxc.load_otf_cube(subset=subset, items=mxc.req_items_local, enlarge_by_n_chunks=0, drop_fill=True)