# Loading recent Sentinel-2 data using STAC and Dask


In [1]:
import pystac_client
import odc.stac
from datetime import datetime, timedelta

import os
import dask
from dask.distributed import LocalCluster, Client

# Connect to the DEA Explorer STAC API to allow searching for data
stac_client = pystac_client.Client.open("https://explorer.dea.ga.gov.au/stac")

# To load data via STAC, we must configure appropriate access to data stored on
# DEA’s Amazon S3 buckets. This can be done with the odc.stac.configure_rio function.
# The configuration below must be used when loading any DEA data through the STAC API.
odc.stac.configure_rio(
    cloud_defaults=True,
    aws={"aws_unsigned": True},
)

# Launch Dask cluster
prefix = os.environ.get('JUPYTERHUB_SERVICE_PREFIX', '/')
dask.config.set({"distributed.dashboard.link": prefix + "proxy/{port}/status"})
cluster = LocalCluster(n_workers=2, threads_per_worker=1, memory_limit=0, processes=True)  
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: /user/robbi.bishoptaylor@ga.gov.au/proxy/8787/status,

0,1
Dashboard: /user/robbi.bishoptaylor@ga.gov.au/proxy/8787/status,Workers: 2
Total threads: 2,Total memory: 0 B
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:35019,Workers: 2
Dashboard: /user/robbi.bishoptaylor@ga.gov.au/proxy/8787/status,Total threads: 2
Started: Just now,Total memory: 0 B

0,1
Comm: tcp://127.0.0.1:38845,Total threads: 1
Dashboard: /user/robbi.bishoptaylor@ga.gov.au/proxy/40435/status,Memory: 0 B
Nanny: tcp://127.0.0.1:34121,
Local directory: /tmp/dask-scratch-space/worker-u4qj4cq2,Local directory: /tmp/dask-scratch-space/worker-u4qj4cq2

0,1
Comm: tcp://127.0.0.1:46697,Total threads: 1
Dashboard: /user/robbi.bishoptaylor@ga.gov.au/proxy/43161/status,Memory: 0 B
Nanny: tcp://127.0.0.1:44221,
Local directory: /tmp/dask-scratch-space/worker-6wmht5xd,Local directory: /tmp/dask-scratch-space/worker-6wmht5xd


## Configure analysis

In [2]:
# Set spatial bounding box to load
bbox = [123, -10, 146, -20]

# Set number of days to look back through time
time_window = 2

# Bands to load
bands = ["nbart_swir_2", "nbart_swir_3"]   # ["nbart_red", "nbart_green", "nbart_blue"] 

## Search for data

In [3]:
# Calculate time query
end = datetime.now()
start = end - timedelta(days=time_window)
time = f"{start:%Y-%m-%d}/{end:%Y-%m-%d}"

# Search for STAC items
query = stac_client.search(
    collections=["ga_s2am_ard_3", "ga_s2bm_ard_3"],
    bbox=bbox,
    datetime=time,
)
items = list(query.items())
print(f"Items found: {len([i.properties['datetime'] for i in items])}")

Items found: 223


## Load data

In [4]:
# lazy load into xarray
ds = odc.stac.load(
    items,
    bbox=bbox,
    measurements=bands,
    crs="EPSG:3577",
    groupby="solar_day",
    resolution=100,
    chunks={"x": 2048, "y": 2048},
)
ds

Unnamed: 0,Array,Chunk
Bytes,2.20 GiB,16.00 MiB
Shape,"(2, 11401, 25892)","(1, 2048, 2048)"
Dask graph,156 chunks in 1 graph layer,156 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.20 GiB 16.00 MiB Shape (2, 11401, 25892) (1, 2048, 2048) Dask graph 156 chunks in 1 graph layer Data type float32 numpy.ndarray",25892  11401  2,

Unnamed: 0,Array,Chunk
Bytes,2.20 GiB,16.00 MiB
Shape,"(2, 11401, 25892)","(1, 2048, 2048)"
Dask graph,156 chunks in 1 graph layer,156 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.20 GiB,16.00 MiB
Shape,"(2, 11401, 25892)","(1, 2048, 2048)"
Dask graph,156 chunks in 1 graph layer,156 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.20 GiB 16.00 MiB Shape (2, 11401, 25892) (1, 2048, 2048) Dask graph 156 chunks in 1 graph layer Data type float32 numpy.ndarray",25892  11401  2,

Unnamed: 0,Array,Chunk
Bytes,2.20 GiB,16.00 MiB
Shape,"(2, 11401, 25892)","(1, 2048, 2048)"
Dask graph,156 chunks in 1 graph layer,156 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray


### Calculate median composite
All computation is done lazily for now

In [5]:
# Normalise between 0 and 1
ds = ds * 0.0001

# Calculate index (please verify, may not be correct!)
mirbi = 10.0 * ds.nbart_swir_2 - 9.8 * ds.nbart_swir_2 + 2.0

# Calculate median
mirbi_median = mirbi.median(dim="time")

# Start computation
mirbi_median = mirbi_median.compute()

  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)


## Export to file

In [6]:
# Export to COG
mirbi_median.odc.write_cog("mirbi_median_cog.tif", overwrite=True)

PosixPath('mirbi_median_cog.tif')

In [7]:
mirbi_median.odc.explore()