# Geomedian

## Notebook Summary

This notebook creates a geomedian product based on user-defined inputs.



## Import required modules

In [22]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import xarray as xr
from datetime import datetime
from datacube_utilities.dc_utilities import write_geotiff_from_xr
import hdstats
import odc.algo

## Initialise

In [23]:
!pip freeze | grep -E 'hdstats|odc-algo|odc-ui'

hdstats==0.1.3
odc-algo==0.1.dev439+gd29f1df
odc-ui==0.1.dev439+gd29f1df


In [24]:
import dask

from dask.distributed import Client
client = Client('dask-scheduler.dask.svc.cluster.local:8786')
client


0,1
Client  Scheduler: tcp://dask-scheduler.dask.svc.cluster.local:8786  Dashboard: http://dask-scheduler.dask.svc.cluster.local:8787/status,Cluster  Workers: 7  Cores: 21  Memory: 140.00 GB


In [25]:
site = 'SI_AOI_'
Quarter = '_Q4'

In [26]:
from datacube import Datacube
from odc.algo import to_f32, from_float, xr_geomedian

dc = Datacube()

product = 's2_esa_sr_granule'

# Sub-region selection - e.g. the city of Suva
#x_extents = (1951322.0, 1984588.0)
#y_extents = (3863701.0, 3883621.0)

#Ba_River
#x_extents = (1866570.0, 1899660.0)
#y_extents = (3941720.0, 3967460.0)

#Tama River
#x_extents = (1838620.0, 1849340.0)
#y_extents = (3875020, 3887030)

#Vatukarasa
#x_extents =  (1873820, 1888200)
#y_extents =  (3865440,3872000)

#SuvaLami – minx,miny maxx,maxy = 
#x_extents =  (1959630, 1975200)
#y_extents =  (3867890, 3881100)

#Kadavu – minx,miny maxx,maxy = 
#x_extents =  (1911620, 1980520)
#y_extents =  (3756300, 3809900)

 
#Vanuatu
#x_extents =  (2019240, 2045040)
#y_extents =  (-2003150, -1980930)

#SI
x_extents = (1103000, 1122300)
y_extents = (-1052150, -1043630)

#FIJI
#crs = 'EPSG:3460'
#VANUATU & SI
crs = 'EPSG:3832'
#time_extents = '2019'

#set baseline start and end of period

#Q1
#baseline_start_date = '2019-01-1'
#baseline_end_date = '2019-03-31'

#Q2
#baseline_start_date = '2019-04-1'
#baseline_end_date = '2019-06-30'

#Q3
#baseline_start_date = '2019-07-1'
#baseline_end_date = '2019-09-30'

#Q4
baseline_start_date = '2019-10-1'
baseline_end_date = '2019-12-31'


In [27]:
#time stuff
def createDate(inputStart, inputEnd):
    start = datetime.strptime(inputStart, '%Y-%m-%d')
    end = datetime.strptime(inputEnd, '%Y-%m-%d')
    startDates = start.date()
    endDates = end.date()
    time_period = (startDates, endDates)
    return time_period

baseline_time_period = createDate(baseline_start_date, baseline_end_date)

In [28]:
data_bands = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2']
#data_bands = ['red', 'green', 'blue']
#data_bands = ['nir', 'swir1', 'swir2']
mask_bands = ['scene_classification']

output_crs = 'EPSG:3460'
resolution = (-10, 10)

xx = dc.load(product=product,
             #time=time_extents,
            time = baseline_time_period,
             x=x_extents,
             y=y_extents,
             crs=crs,
             output_crs=output_crs,
             resolution=resolution,
             #align=(15, 15),
             measurements=data_bands + mask_bands,
             #group_by='solar_day',
             dask_chunks=dict(
                 x=1000, 
                 y=1000)
            )
xx

Unnamed: 0,Array,Chunk
Bytes,147.66 MB,2.00 MB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 147.66 MB 2.00 MB Shape (36, 998, 2055) (1, 998, 1000) Count 180 Tasks 108 Chunks Type uint16 numpy.ndarray",2055  998  36,

Unnamed: 0,Array,Chunk
Bytes,147.66 MB,2.00 MB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,147.66 MB,2.00 MB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 147.66 MB 2.00 MB Shape (36, 998, 2055) (1, 998, 1000) Count 180 Tasks 108 Chunks Type uint16 numpy.ndarray",2055  998  36,

Unnamed: 0,Array,Chunk
Bytes,147.66 MB,2.00 MB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,147.66 MB,2.00 MB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 147.66 MB 2.00 MB Shape (36, 998, 2055) (1, 998, 1000) Count 180 Tasks 108 Chunks Type uint16 numpy.ndarray",2055  998  36,

Unnamed: 0,Array,Chunk
Bytes,147.66 MB,2.00 MB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,147.66 MB,2.00 MB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 147.66 MB 2.00 MB Shape (36, 998, 2055) (1, 998, 1000) Count 180 Tasks 108 Chunks Type uint16 numpy.ndarray",2055  998  36,

Unnamed: 0,Array,Chunk
Bytes,147.66 MB,2.00 MB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,147.66 MB,2.00 MB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 147.66 MB 2.00 MB Shape (36, 998, 2055) (1, 998, 1000) Count 180 Tasks 108 Chunks Type uint16 numpy.ndarray",2055  998  36,

Unnamed: 0,Array,Chunk
Bytes,147.66 MB,2.00 MB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,147.66 MB,2.00 MB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 147.66 MB 2.00 MB Shape (36, 998, 2055) (1, 998, 1000) Count 180 Tasks 108 Chunks Type uint16 numpy.ndarray",2055  998  36,

Unnamed: 0,Array,Chunk
Bytes,147.66 MB,2.00 MB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,73.83 MB,998.00 kB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint8,numpy.ndarray
"Array Chunk Bytes 73.83 MB 998.00 kB Shape (36, 998, 2055) (1, 998, 1000) Count 180 Tasks 108 Chunks Type uint8 numpy.ndarray",2055  998  36,

Unnamed: 0,Array,Chunk
Bytes,73.83 MB,998.00 kB
Shape,"(36, 998, 2055)","(1, 998, 1000)"
Count,180 Tasks,108 Chunks
Type,uint8,numpy.ndarray


In [29]:
scale, offset = (1/10_000, 0)  # differs per product, aim for 0-1 values in float32

# Identify pixels with valid data (requires working with native resolution datasets)
#good_quality = (
#    (xx.scene_classification == 4) | # mask in VEGETATION
#    (xx.scene_classification == 5) | # mask in NOT_VEGETATED
#    (xx.scene_classification == 6) | # mask in WATER
#    (xx.scene_classification == 7)   # mask in UNCLASSIFIED
#)

good_quality = (
    (xx.scene_classification != 0) & # mask out NO_DATA
    (xx.scene_classification != 1) & # mask out SATURATED_OR_DEFECTIVE
    (xx.scene_classification != 2) & # mask out DARK_AREA_PIXELS
    (xx.scene_classification != 3) & # mask out CLOUD_SHADOWS
    (xx.scene_classification != 8) & # mask out CLOUD_MEDIUM_PROBABILITY
    (xx.scene_classification != 9) & # mask out CLOUD_HIGH_PROBABILITY
    (xx.scene_classification != 10)&  # mask out THIN_CIRRUS
    (xx.scene_classification != 11)  # mask out SNOW
)

xx_data = xx[data_bands]
xx_clean = odc.algo.keep_good_only(xx_data, where=good_quality)
xx_clean = to_f32(xx_clean, scale=scale, offset=offset)
yy = xr_geomedian(xx_clean, 
                  num_threads=1,  # disable internal threading, dask will run several concurrently
                  eps=0.2*scale,  # 1/5 pixel value resolution
                  nocheck=True)   # disable some checks inside geomedian library that use too much ram

yy = from_float(yy, 
                dtype='int16', 
                nodata=-9999, 
                scale=1/scale, 
                offset=-offset/scale)
#yy2 = yy.get(['red'])
#print(yy2)

In [30]:
%%time
yy2 = yy.compute()
yy2

CPU times: user 1.81 s, sys: 213 ms, total: 2.03 s
Wall time: 2min 59s


In [31]:
#print(yy)

In [32]:
#from odc.ui import to_png_data
#from IPython.display import Image

#rgba = odc.algo.to_rgba(yy, clamp=3000)
#Image(data=to_png_data(rgba.data))

In [33]:
for varname, da in yy2.data_vars.items():
    sinBand = yy2.get([varname])
    outputname = 'qGeomedians/' + site + varname + Quarter + '.tif'
    print(outputname)
    write_geotiff_from_xr(outputname, sinBand, crs=crs, x_coord = 'x', y_coord = 'y')
    

qGeomedians/SI_AOI_red_Q4.tif
qGeomedians/SI_AOI_green_Q4.tif
qGeomedians/SI_AOI_blue_Q4.tif
qGeomedians/SI_AOI_nir_Q4.tif
qGeomedians/SI_AOI_swir1_Q4.tif
qGeomedians/SI_AOI_swir2_Q4.tif


---

In [34]:
print('broken')

broken
