# World Water Toolbox

This is an example of the processing chain of using the World Water Toolbox with the openEO Platform. 
The Processing chain is divided to 3 main sub-flows.

![flow](images/flow.PNG)

In [15]:
# import libraries
import numpy as np
import pathlib

import openeo
from openeo.extra.spectral_indices.spectral_indices import append_index
from openeo.processes import array_element, normalized_difference

from eo_utils import *
from datetime import datetime
import dask.array as da
import xarray as xr
import scipy.signal
from dateutil.relativedelta import *

In [6]:
#connect with openeo backend
connection = openeo.connect("openeo.cloud")
connection.authenticate_oidc()
connection.describe_account()

Authenticated using refresh token.


{'default_plan': 'early-adopter',
 'info': {'default_plan': 'early-adopter',
  'oidc_userinfo': {'acr': 'https://refeds.org/assurance/IAP/low',
   'eduperson_assurance': ['https://aai.egi.eu/LoA#Low',
    'https://refeds.org/assurance/IAP/low'],
   'eduperson_entitlement': ['urn:mace:egi.eu:group:vo.openeo.cloud:role=vm_operator#aai.egi.eu',
    'urn:mace:egi.eu:group:vo.openeo.cloud:role=member#aai.egi.eu',
    'urn:mace:egi.eu:group:vo.openeo.cloud:role=early_adopter#aai.egi.eu'],
   'email': 'sulova.andrea@gmail.com',
   'email_verified': True,
   'sub': '1edbae7adc053e5164b8ac7696e17a9ec031bf5a11fe6dce659cafe39a9366a2@egi.eu',
   'voperson_verified_email': ['sulova.andrea@gmail.com']},
  'roles': ['EarlyAdopter']},
 'name': 'sulova.andrea@gmail.com',
 'user_id': '1edbae7adc053e5164b8ac7696e17a9ec031bf5a11fe6dce659cafe39a9366a2@egi.eu'}

# Input parameters


- Specify the Area of Interest

In [8]:
# Colombbia
center = [4.707, -73.987]
zoom = 14
eoMap = openMap(center,zoom)
eoMap.map

Map(center=[4.707, -73.987], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom…

In [9]:
bbox = eoMap.getBbox()
spatial_extent  = {'west':bbox[0],'east':bbox[2],'south':bbox[1],'north':bbox[3],'crs':4326}
print('west',bbox[0],'\neast',bbox[2],'\nsouth',bbox[1],'\nnorth',bbox[3])

west -74.0651035308838 
east -73.90897750854494 
south 4.689864510216501 
north 4.724080996325038


# 1 ) *Sentinel-2 ARD Pre-Processing*


* Specify collections :
  
  * **SENTINEL2_L1C** (EODC) - Missing Zenith Azimuth, support FORCE and Fmask

  * **SENTINEL2_L1C_SENTINELHUB** (TerraScope backend) - supports SMAC and iCor atmospheric correction

  * **boa_sentinel_2** (EODC) -  (ARD)  processed with FORCE. Missing Zenith and Azimuth information, cloud mask needs to be applied


  * **SENTINEL2_L2A_SENTINELHUB** (TerraScope backend) - Having Zenith and Azimuth information,  (ARD) processed with sen2cor., cloud mask needs to be applied

  Read more about ARD https://docs.openeo.cloud/usecases/ard/msi/#reference-implementations



![S2](images/S2_v1.PNG)

* Specify temporal extent and bands:

In [None]:

start_date = '2021-01-01'

start_date_dt_object = datetime.strptime(start_date, '%Y-%m-%d')
end_date = (start_date_dt_object + relativedelta(months = +1)).date() ## End date, 1 month later (1st Feb. 2021)
start_date_exclusion = (start_date_dt_object + relativedelta(months = -2)).date() ## exclusion date, to give a 3 month window.



start_date      = '2022-01-01'
end_date        = '2022-01-31'
bands           = ['B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09',  'B11', 'B12', 'CLP', 'SCL' , 'sunAzimuthAngles', 'sunZenithAngles'] 
spatial_extent  = {'west': -74.06810760, 'east': -73.90597343, 'south': 4.689864510, 'north': 4.724080996, 'crs': 'epsg:4326'}  #colombia
# spatial_extent  = {'west':bbox[0],'east':bbox[2],'south':bbox[1],'north':bbox[3]}

## Get the Sentinel-2 data for a 3 month window.
s2_cube = connection.load_collection(
    'SENTINEL2_L2A_SENTINELHUB',
    spatial_extent = spatial_extent,
    temporal_extent = [start_date_exclusion, end_date],
    bands = ['B02', 'B03', 'B04', 'B08', 'CLP', 'SCL', 'sunAzimuthAngles'] 
)


A) *Mask Clouds, Shadows and Snow*
- atmospheric_correction: https://docs.openeo.cloud/usecases/ard/msi/#reference-implementations

In [21]:
# Scene classification data, based on Sen2Cor process
# scl == 3    Cloud Shadows 
# scl == 8    Clouds medium probability
# scl == 9    Clouds high probability
# scl == 10   Cirrus
# scl == 11   Snow / Ice

scl = s2_cube.band("SCL")
mask = (scl == 3) | (scl == 8) | (scl == 9) | (scl == 10) |(scl == 11)
S2_cube_scl = s2_cube.mask(mask)

# CLP (cloud probabilities) based on s2cloudless
clp = S2_cube.band("CLP")
clp = clp.resample_spatial(resolution=20, method = "bicubic")
mask = (clp / 255) > 0.3  # 160m resolution s2cloudless so it does not have to use
S2_cube = S2_cube_scl.mask(mask)


- Examples of Cloud Masking

a) S2

![cloudMask](images/CloudMask_RGB.PNG)

b) S2 CLP

![cloudMask](images/CloudMask_CLP.PNG)

c) S2 SCL

![cloudMask](images/CloudMask_SCL.PNG)

d) S2 CLP - SCL

![cloudMask](images/CloudMask_SCL_CLP.PNG)

B) NDVI Calculation 
-   NDVI (Sentinel 2) = (B8 – B4) / (B8 + B4)


In [22]:
S2_cube = append_index(S2_cube,"NDVI")

C) NDWI Calculation 

-   NDWI (Sentinel 2) = (B3 – B8) / (B3 + B8)

In [23]:
S2_cube = append_index(S2_cube,"NDWI")

D) Mask Terrain Shadow

In [None]:
dem_cube = connection.load_collection(
    "COPERNICUS_30",
    spatial_extent = spatial_extent,
    temporal_extent=["2010-01-01", "2030-12-31"])

dem_cube = dem_cube.max_time()
dem_cube.download("poc-copernicus30-raw.nc") 
dem_ds = xr.load_dataset("poc-copernicus30-raw.nc"

In [25]:
udf_code = """

from openeo.udf import XarrayDataCube
from hillshade.hillshade import hillshade

def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
    xarr = cube.get_array()
    if "t" in xarr.coords:
        xarr = xarr[{"t": 0}]
    if "bands" in xarr.coords:
        xarr = xarr[{"bands": 0}]
    
    elevation = xarr.values
    elevation = np.nan_to_num(elevation).clip(0, 8000)
    elevation = elevation.astype("float32")
    
    resolution = (30.0, 30.0)
    zenith = 15.0
    ray = (0.50, 1.0)
    ystart = 0
    yend = elevation.shape[1]
    
    shadow = hillshade(elevation, resolution, zenith, ray, ystart, yend)
    
    cube.get_array().values[0] = shadow
    return cube
"""
process = openeo.UDF(code=udf_code, runtime="Python", data={"from_parameter": "x"})
hillshaded = dem_cube.apply(process=process)
hillshaded.download("poc-hillshaded.nc")


In [None]:
hillshaded_ds = xarray.load_dataset("poc-hillshaded.nc")
hillshaded_ds["DEM"].plot(size = 10)

![cloudMask](images/Terrain_shadow.PNG)


Get an image count (using B8) of all the non-masked areas. ***

In [None]:
s2_count = s2_cube.filter_bands(bands = ["B08"]).reduce_dimension(reducer = "count", dimension = "t")

def water_function(data):
    ndwi = array_element(data, index = 7)
    ndvi = array_element(data, index = 8)
    water = 1 / (1 + exp(- (0.845 + (2.14 * ndvi) + (13.5 * ndwi))))
    return water

 ## Of the non-masked data, calculate water probability for each image.
s2_cube_water = s2_cube.reduce_dimension(reducer = water_function, dimension = "bands")

## apply a threshold of 0.75
s2_cube_water_threshold = s2_cube_water.apply(lambda x: x > 0.75)


## Count the number of Sentinel-2 Water observations.
cube_sum = s2_cube_water_threshold.reduce_dimension(reducer = "sum", dimension = "t")

## Calculate surface water frequency. Water observations / Total image count.
cube_swf = cube_sum / s2_count
  

E) *Create Monthly Best-Pixel Mosaic*

In [26]:
## calculate the median s2 water probability for 1-month.
s2_median_water = s2_cube_water.filter_temporal([start_date, end_date]).median_time()
s2_cube_median = s2_cube.filter_temporal([start_date, end_date]).median_time()

Check & download results: https://editor.openeo.org/

In [None]:
# Send cube to server
s2_cube_median = s2_cube_median.save_result(format='GTiff') #GTiff #netCDF
my_job  = s2_cube_median.send_job(title="S2_L2A")
results = my_job.start_and_wait().get_results()
results.download_files("s2_cube_median")

In [None]:
# !gdalinfo -hist output/openEO.nc
S2_L2A = xr.open_dataset('s2_cube_median')

# 2) *Sentinel-1 ARD Pre-Processing*

Load Collection of Sentinel-1 

![Cat](images/S1_v1.PNG)

 Load Collection

In [None]:
## Get Sentinel-1 data for a 1 month window and convert to ARD data.
s1_cube = connection.load_collection(
    'SENTINEL1_GRD', 
     spatial_extent = spatial_extent, 
     temporal_extent = [start_date, end_date], 
     bands = ['VH','VV'],
     properties = {"polarization": lambda p: p == "DV"})


A)  Analysis-Ready-Data 

In [25]:
s1_cube = s1_cube.ard_normalized_radar_backscatter()


def log_(x):
  return 10 * log(x, 10)

s1_median = s1_cube.median_time().apply(log_) 

*C) Mask Sentinel-1 Exclusion Layer*

In [None]:
def s1_water_function(data):
    vv = array_element(data, index = 1)                
    water = 1 / (1 + exp(- (-7.17 + (-0.48 * vv))))    
    return water

s1_median_water = s1_median.reduce_dimension(reducer = s1_water_function, dimension = "bands")
exclusion_mask = (s1_median_water > 0.5) & (cube_swf < 0.33)
s1_median_water = s1_median_water.mask(exclusion_mask)


Check & download results: https://editor.openeo.org/

In [None]:
s1_median_water = s1_median_water.save_result(format='GTiff') #GTiff #netCDF
my_job  = s1_median_water.send_job(title="S1_ARD")
results = my_job.start_and_wait().get_results()

results.download_files('s1_median_water')

# 3) Water Masking

![Cat](images/WM_v1.png)

- Load LUT and run logistic expressions


In [60]:
def s1_s2_water_function (dc):
    vv = array_element(dc, index = 0)       
    ndwi = array_element(dc, index = 1)  
    water = 1 / (1 + exp(- (-2.64 + (-0.23 * vv) + (8.6 * ndwi))))  
    return water


## Get S1 VV, and S2 NDWI, and apply s1_s2_water function
s1_s2_cube = s1_median.filter_bands(['VV']).merge_cubes(s2_cube_median.filter_bands(['NDWI']))
s1_s2_cube.download('s1_s2_cube.tif') ##taking more than 15 mins.
s1_s2_water = s1_s2_cube.reduce_dimension(reducer = s1_s2_water_function, dimension = "bands").mask(exclusion_mask)
s1_s2_water.download('s1_s2_water.tif')


## create the masks so that the data is independent of each other.
s1_s2_mask = s1_s2_water >= 0
s2_mask = s2_median_water.mask(s1_s2_mask) >= 0
s1_mask = s1_median_water.mask(s1_s2_mask).mask(s2_mask) >= 0
# nodata_mask


## download and check the masks
s1_s2_mask.download('s1_s2_mask.tif')
s2_mask.download('s2_mask.tif')
s1_mask.download('s1_mask.tif')


## masks need to be inverted.
s1_s2_masked = s1_s2_water.mask(s1_s2_mask.apply(lambda x: x.eq(0)), replacement = 0)
s2_masked = s2_median_water.mask(s2_mask.apply(lambda x: x.eq(0)), replacement = 0)
s1_masked = s1_median_water.mask(s1_mask.apply(lambda x: x.eq(0)), replacement = 0)


## download and check probabilities are correct after masking.
s1_s2_masked.download('s1_s2_masked-v2.tif')
s2_masked.download('s2_masked-v2.tif')
s1_masked.download('s1_masked-v2.tif')


## merge the masked probabilities using "sum"
merge1 = s1_s2_masked.merge_cubes(s2_masked, overlap_resolver='sum')
merge2 = merge1.merge_cubes(s1_masked, overlap_resolver='sum')
merge2.download('composite-probability.tif')


*B) Mask Urban Areas*

In [None]:
worldcover_cube = connection.load_collection("ESA_WORLDCOVER_10M_2020_V1", 
                                            temporal_extent = [start_date, end_date], 
                                            spatial_extent = spatial_extent, 
                                            bands = ["MAP"])
                                            #.filter_bbox({'west':bbox[0],'east':bbox[2],'south':bbox[1],'north':bbox[3]})

builtup_mask = worldcover_cube.band("MAP") != 50

composite = merge2.mask(builtup_mask)

*C) Threshold water*

In [None]:
threshold_water = composite > 0.75

# bitmask