In [1]:
# install dependencies
import sys
!{sys.executable} -m pip install gdal shapely geopandas rasterio xarray \
    earthengine-api matplotlib contextily --upgrade >> /dev/null

In [2]:
import os
import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely
from osgeo import gdal, ogr
import urllib
import shutil
import glob
import zipfile
import ee

In [3]:
from IPython.display import Image
from IPython.core.display import HTML
import contextily as ctx
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
# define Pandas display settings
pd.set_option('display.max_rows', 5)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

## Define helper functions

In [5]:
# create worldfile to define image coordinates
def worldfile_tofile(fname, area, dimensions):
    name, ext = os.path.splitext(fname)
    # use QGIS worldfile names convention 
    jext = ext[1] + ext[-1] + 'w'
    fname = os.path.join(str(os.extsep).join([name,jext]))
    with open(fname, 'w') as outfile:
        xres = (area[2]-area[0])/dimensions[0]
        yres = (area[1]-area[3])/dimensions[1]
        coefficients = [xres, 0, 0, yres, area[0], area[3]]
        print('\n'.join(map(str, coefficients)), file=outfile)

# download GEE URL and save to file
def geeurl_tofile(GEEurl, fname):
    with urllib.request.urlopen(GEEurl) as response, open(fname, 'wb') as outfile:
        shutil.copyfileobj(response, outfile)

def gee_preview_tofile(GEEimage, vis, dimensions, fname=None):
    GEEurl = GEEimage\
        .visualize(**vis)\
        .getThumbURL({'dimensions':dimensions, 'format': 'jpg'})
    #print (GEEurl)
    if fname is not None:
        geeurl_tofile(GEEurl, fname)
        worldfile_tofile(fname, area, dimensions)
    return {'url': GEEurl, 'width': dimensions[0], 'height': dimensions[1]}

def split_rect(rect, n):
    lats = np.linspace(rect[0], rect[2], n+1)
    lons = np.linspace(rect[1], rect[3], n+1)
    #print (lats, lons)
    cells = []
    for lt1, lt2 in zip(lats.ravel()[:-1], lats.ravel()[1:]):
        for ll1, ll2 in zip(lons.ravel()[:-1], lons.ravel()[1:]):
            cell = [lt1, ll1, lt2, ll2]
            cells.append(cell)
    return cells

def zipsbands2image(files):
    dss = []
    # merge separate file areas
    for fname in sorted(files):
        #print ('fname', fname)
        zip = zipfile.ZipFile(fname)
        # merge separate file to dataset
        ds = xr.Dataset()
        for bandname in zip.namelist():
            varname = bandname.split('.')[1]
            da = xr.open_rasterio(f'/vsizip/{fname}/{bandname}').squeeze(drop=True)
            ds[varname] = da
            da.close()
        dss.append(ds)
    return xr.merge(dss)

## Define GEE functions

In [6]:
# Function to mask clouds using the Sentinel-2 QA band.
def GEEmaskS2clouds(image):
    # Get the pixel QA band.
    qa = image.select('QA60')

    # Bits 10 and 11 are clouds and cirrus, respectively.
    cloudBitMask = 1 << 10
    cirrusBitMask = 1 << 11

    # Both flags should be set to zero, indicating clear conditions.
    cloudMask = qa.bitwiseAnd(cloudBitMask).eq(0)
    cirrusMask = qa.bitwiseAnd(cirrusBitMask).eq(0)

    # Return the masked and scaled data, without the QA bands.
    return image\
        .updateMask(cloudMask)\
        .updateMask(cirrusMask)\
        .divide(10000)\
        .select("B.*")\
        .copyProperties(image, ["system:time_start"])

# Function to cloud mask from the pixel_qa band of Landsat 8 SR data.
def GEEmaskL8sr(image):
    # Get the pixel QA band.
    qa = image.select('pixel_qa')
    
    # Bits 3 and 5 are cloud shadow and cloud, respectively.
    cloudShadowBitMask = 1 << 3
    cloudsBitMask = 1 << 5

    # Both flags should be set to zero, indicating clear conditions.
    cloudShadowMask = qa.bitwiseAnd(cloudShadowBitMask).eq(0)
    cloudsMask = qa.bitwiseAnd(cloudsBitMask).eq(0)

    # Return the masked image, scaled to reflectance, without the QA bands.
    return image\
        .updateMask(cloudShadowMask)\
        .updateMask(cloudsMask)\
        .divide(10000)\
        .select("B[0-9]*")\
        .copyProperties(image, ["system:time_start"]);

## Connect to GEE

In [7]:
service_account = 'console@gee-export-308512.iam.gserviceaccount.com'
service_key = '/Users/mbg/gee-export.json'

In [8]:
# for GDAL
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = service_key

In [9]:
credentials = ee.ServiceAccountCredentials(service_account, service_key)
ee.Initialize(credentials)

## Define work area and scale

In [10]:
# work area, decimal degrees
area = [116.789084,  -9.031312, 117.291524,  -8.632405]
# processing scale, m
scale = 30

GEEarea = ee.Geometry.Rectangle(*area)
GEEarea.getInfo()

{'type': 'Polygon',
 'coordinates': [[[116.789084, -9.031312],
   [117.291524, -9.031312],
   [117.291524, -8.632405],
   [116.789084, -8.632405],
   [116.789084, -9.031312]]]}

## Prepare image bands

In [11]:
# https://developers.google.com/earth-engine/datasets/catalog/CSP_ERGo_1_0_Global_ALOS_topoDiversity
GEEdiversity = ee.Image("CSP/ERGo/1_0/Global/ALOS_topoDiversity").rename('topoDiversity')
# https://developers.google.com/earth-engine/datasets/catalog/CSP_ERGo_1_0_Global_ALOS_mTPI
GEEposition = ee.Image("CSP/ERGo/1_0/Global/ALOS_mTPI").rename('mTPI')
# https://developers.google.com/earth-engine/datasets/catalog/JAXA_ALOS_PALSAR_YEARLY_FNF
# 1 - Forest, 2 - Non-Forest, 3 - Water
GEEfnf = ee.ImageCollection("JAXA/ALOS/PALSAR/YEARLY/FNF").mosaic().select('fnf').rename('FNF')
# https://developers.google.com/earth-engine/datasets/catalog/JAXA_ALOS_AW3D30_V3_2
GEEdsm = ee.ImageCollection("JAXA/ALOS/AW3D30/V3_2").mosaic().select('DSM')

### Sentinel-2 composite image

In [12]:
# https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2_SR
GEEs2srcol = ee.ImageCollection('COPERNICUS/S2_SR')\
    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))\
    .map(GEEmaskS2clouds)\
    .filterBounds(GEEarea)
GEEs2sr = GEEs2srcol.median().rename(GEEs2srcol.first().bandNames().map(lambda name: ee.String('S2SR').cat(name)))
print ('Collected Sentinel-2 SR images', GEEs2srcol.size().getInfo())
print ('Collected Sentinel-2 SR bands', GEEs2sr.bandNames().getInfo())

Collected Sentinel-2 SR images 177
Collected Sentinel-2 SR bands ['S2SRB1', 'S2SRB2', 'S2SRB3', 'S2SRB4', 'S2SRB5', 'S2SRB6', 'S2SRB7', 'S2SRB8', 'S2SRB8A', 'S2SRB9', 'S2SRB11', 'S2SRB12']


### Landsat 8 composite image

In [13]:
# https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LC08_C01_T1_TOA
GEElc8toacol = ee.ImageCollection('LANDSAT/LC08/C01/T1')\
    .filterBounds(GEEarea)
# https://developers.google.com/earth-engine/apidocs/ee-algorithms-landsat-simplecomposite
GEElc8toacomposite = ee.Algorithms.Landsat.simpleComposite(
        collection=GEElc8toacol,
        cloudScoreRange=20,
        maxDepth=40,
        asFloat=True
    )
GEElc8toa = GEElc8toacomposite.rename(GEElc8toacomposite.bandNames().map(lambda name: ee.String('LC8TOA').cat(name)))
print ('Collected Landsat-8 TOA images', GEElc8toacol.size().getInfo())
print ('Collected Landsat-8 TOA bands', GEElc8toa.bandNames().getInfo())

Collected Landsat-8 TOA images 239
Collected Landsat-8 TOA bands ['LC8TOAB1', 'LC8TOAB2', 'LC8TOAB3', 'LC8TOAB4', 'LC8TOAB5', 'LC8TOAB6', 'LC8TOAB7', 'LC8TOAB8', 'LC8TOAB9', 'LC8TOAB10', 'LC8TOAB11']


## Build image mosaic

In [14]:
GEEimage = \
    GEEs2sr\
    .addBands(GEElc8toa)\
    .addBands(GEEdiversity)\
    .addBands(GEEposition)\
    .addBands(GEEfnf)\
    .addBands(GEEdsm)\
    .clip(GEEarea)
print ('collected bands', GEEimage.bandNames().getInfo())

collected bands ['S2SRB1', 'S2SRB2', 'S2SRB3', 'S2SRB4', 'S2SRB5', 'S2SRB6', 'S2SRB7', 'S2SRB8', 'S2SRB8A', 'S2SRB9', 'S2SRB11', 'S2SRB12', 'LC8TOAB1', 'LC8TOAB2', 'LC8TOAB3', 'LC8TOAB4', 'LC8TOAB5', 'LC8TOAB6', 'LC8TOAB7', 'LC8TOAB8', 'LC8TOAB9', 'LC8TOAB10', 'LC8TOAB11', 'topoDiversity', 'mTPI', 'FNF', 'DSM']


### Make image preview and save on local filesystem
Note: we need to check the image preview quality before the image usage

In [15]:
# define GEE Landsat 8 visualization parameters
#LC8vis = {'bands':['LC8SRB4', 'LC8SRB3', 'LC8SRB2'], 'min':0.03, 'max':0.18, 'gamma':1.4}
LC8vis = {'bands':['LC8TOAB4', 'LC8TOAB3', 'LC8TOAB2'], 'min':0.03, 'max':0.18, 'gamma':1.4}

In [16]:
# show small and fast preview low scale
Image(**gee_preview_tofile(GEEimage, LC8vis, (160, 160)))

In [17]:
%%time
print (gee_preview_tofile(GEEimage, LC8vis, (800, 800), 'data/preview.800x800.jpg')['url'])

https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/1fa72922ac66f72165e368485bd77412-9ab0f75ddcaf046943d8a5b57dd4c65d:getPixels
CPU times: user 51.7 ms, sys: 14.3 ms, total: 66 ms
Wall time: 2min 16s


In [18]:
%%time
print (gee_preview_tofile(GEEimage, LC8vis, (1600, 1600), 'data/preview.1600x1600.jpg')['url'])

https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/1dadef2a9cb0cf6ae6d3171cd36d39bb-6d74250258c0499d5ec4f1b095bb13d7:getPixels
CPU times: user 77.9 ms, sys: 37.1 ms, total: 115 ms
Wall time: 4min 14s


### Extract raw image bands and save on local filesystem

In [None]:
# split the area for n*n cells to follow download limits
n = 3
for idx, cell in enumerate(split_rect(area, n)):
    GEEurl = GEEimage.getDownloadURL({'filePerBand':True, 'scale': scale, 'region': ee.Geometry.Rectangle(*cell)})
    print (f'downloading cell {idx+1} from {n*n}', GEEurl)
    fname = f'data/image_{idx}.{scale}m.zip'
    geeurl_tofile(GEEurl, fname)

### Convert downloaded zipped images chunks to compressed NetCDF file

In [None]:
%%time
ds = zipsbands2image(glob.glob(f'data/image_*.{scale}m.zip'))
# fix some variables data
ds.mTPI.values[ds.mTPI==ds.mTPI.nodatavals] = np.nan
ds.topoDiversity.values[ds.topoDiversity == -np.inf] = np.nan

In [None]:
# check data variables
for varname in ds.data_vars:
    print (varname, float(ds[varname].min()), float(ds[varname].max()))

In [None]:
encoding = {var: dict(zlib=True, complevel=6) for var in ds.data_vars}
ds.to_netcdf(f'data/image.{scale}m.nc', encoding=encoding)
print (ds.dims)