In [1]:
%matplotlib inline
import dask.array
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt

In [2]:

import dask
from dask.distributed import Client
client = Client('dask-scheduler.dask.svc.cluster.local:8786')
client

0,1
Client  Scheduler: tcp://dask-scheduler.dask.svc.cluster.local:8786  Dashboard: http://dask-scheduler.dask.svc.cluster.local:8787/status,Cluster  Workers: 5  Cores: 40  Memory: 168.65 GB


In [3]:
from datacube import Datacube
from datacube.storage.masking import mask_invalid_data

dc = Datacube()

In [4]:
product = 'ls8_water_classification'
measurements = ['water']

output_crs = 'EPSG:3460'
resolution = (-30, 30)

In [5]:

# Sub-region selection - e.g. Viti Levu
latitude = (-18.2962, -17.2867)
longitude = (177.1594, 178.7222)

# Sub-region selection - e.g. Viti Levu's eastern coast
#latitude = (-18.0438, -17.5391)
#longitude = (178.3315, 178.7222)

# Sub-region selection - e.g. the city of Suva
#latitude = (-18.2316, -18.0516)
#longitude = (178.2819, 178.6019)

time_extents = ('2017-01-01', '2019-01-01')

In [6]:

water_dataset = dc.load(
    product=product,
    time=time_extents,
    lat=latitude,
    lon=longitude,
    output_crs=output_crs,
    resolution=resolution,
    measurements = measurements,
    group_by='solar_day',
    dask_chunks={
        #'time': 1,
        'x': 1000,
        'y': 1000,
    }
)
water_dataset

<xarray.Dataset>
Dimensions:  (time: 137, x: 5540, y: 3749)
Coordinates:
  * time     (time) datetime64[ns] 2017-01-07T22:06:49 ... 2018-12-28T22:06:30
  * y        (y) float64 3.968e+06 3.968e+06 3.968e+06 ... 3.856e+06 3.856e+06
  * x        (x) float64 1.831e+06 1.831e+06 1.831e+06 ... 1.997e+06 1.997e+06
Data variables:
    water    (time, y, x) int16 dask.array<shape=(137, 3749, 5540), chunksize=(1, 1000, 1000)>
Attributes:
    crs:      EPSG:3460

In [7]:
print("Up to {:2.2f} billion pixels might be involved".format((len(water_dataset.x) * len(water_dataset.y) * len(water_dataset.time)) / 1e9))

Up to 2.85 billion pixels might be involved


In [8]:
# Identify pixels with valid data (requires working with native resolution datasets)
good_quality = (
    (water_dataset.water != -9999) # Exclude "nodata" pixels
)

# Apply mask
water_dataset = water_dataset.where(good_quality)

In [9]:
mean_dataset = water_dataset.water.mean(dim='time')


In [None]:
%%time
mean_dataset = mean_dataset.compute()

In [None]:
del water_dataset

In [None]:
fig, ax = plt.subplots(figsize=(100, 100))
ax.imshow(mean_dataset)

In [None]:
client.restart()