In [1]:
import boto3
import sys
import numpy as np
import time
import os
import subprocess
import xarray as xr
import rioxarray
import dask.array as da
from os import path
import pandas as pd
from datacube.utils.dask import start_local_dask
from pathlib import Path
from datacube.utils.cog import write_cog

In [2]:
def categorize_damage(bc_canopy, ac_canopy):
    """
    The function to categorize the change
    inputs:
        bc_canopy: the canopy class of previous year
        ac_canopy: the canopy class of current year
    output:
        an xarray of canopy change
    """
    nodata = int(bc_canopy.attrs.get('_FillValue'))
    reduction = bc_canopy.copy(deep=True, data=da.zeros(bc_canopy.data.shape, dtype='uint8'))
    reduction.attrs.pop('_FillValue')
    reduction.attrs['nodata'] = 0
    reduction.data = da.where((bc_canopy.values == 0) | (ac_canopy.values == 0), 101, reduction.data)
    reduction.data = da.where(bc_canopy.values == ac_canopy.values, 101, reduction.data)
    reduction.data = da.where((bc_canopy.values >= 2) & (ac_canopy.values == nodata), 105, reduction.data)
    reduction.data = da.where((bc_canopy.values == 1) & (ac_canopy.values == nodata), 104, reduction.data)
    reduction.data = da.where((bc_canopy.values == 3) & (ac_canopy.values == 1), 103, reduction.data)
    reduction.data = da.where((bc_canopy.values == 3) & (ac_canopy.values == 2), 102, reduction.data)
    reduction.data = da.where((bc_canopy.values == 2) & (ac_canopy.values == 1), 102, reduction.data)
    reduction.data = da.where((bc_canopy.values == nodata) & (ac_canopy.values >= 1), 201, reduction.data)
    reduction.data = da.where((bc_canopy.values == 1) & (ac_canopy.values == 2), 202, reduction.data)
    reduction.data = da.where((bc_canopy.values == 2) & (ac_canopy.values == 3), 202, reduction.data)
    reduction.data = da.where((bc_canopy.values == 1) & (ac_canopy.values == 3), 203, reduction.data)
    reduction.data = da.where((bc_canopy.values == nodata) & (ac_canopy.values == nodata), 0, reduction.data)
    return reduction

In [3]:
# bucket and prefix is where the C3 mangroves sit currently
bucket = "dea-public-data-dev"
prefix = "derivative/ga_ls_mangrove_cover_cyear_3/2-0-2/"
# profile_name is the aws credential name, change it accordingly
session = boto3.Session(profile_name='dev')
s3_client = session.client("s3")
paginator = s3_client.get_paginator('list_objects')

In [4]:
# spin up a dask cluster, change the thread and memory accordingly
client = start_local_dask(n_workers=1, threads_per_worker=8, memory_limit='62GB')
client

0,1
Client  Scheduler: tcp://127.0.0.1:40723  Dashboard: /user/emma.ai@ga.gov.au/proxy/8787/status,Cluster  Workers: 1  Cores: 8  Memory: 62.00 GB


In [None]:
# grid_list.csv contains a list of grids where the mangroves exist
grid_list = pd.read_csv("grid_list.csv", header=None, dtype="str")
for row in grid_list.iterrows():
    # iterate over all the grids
    print(row)
    bc_canopy = None
    for year in range(1987, 2022):
        # iterate over all the years
        print(year)
        # make up the prefix/path of mangroves imagery sitting in s3
        ac_prefix = prefix+"/".join(["x"+row[1][0], "y"+row[1][1], str(year)+"--P1Y"])
        page_iterator = paginator.paginate(Bucket=bucket, Prefix=ac_prefix)
        # filter the objects and get only .tif file (omit all the files of metadata)
        filtered_iterator = page_iterator.search("Contents[?contains(Key, '.tif') == `true`].Key")
        for o in filtered_iterator:
            print(o)
            # read in the data as dask array
            ac_canopy = rioxarray.open_rasterio("s3://"+"/".join([bucket, o]), chunks={"x": -1, "y": -1})
        if bc_canopy is not None:
            # compute the change
            canopy_change = categorize_damage(bc_canopy, ac_canopy)
            # save the data locally as geotiff
            file_name = "ga_ls_mangrove_change_cyear_3_" + "_".join([ "x" + row[1][0]+"y" + row[1][1], str(year-1), str(year)]) + ".tif" 
            write_cog(canopy_change.compute(), file_name)
            # upload the local file to s3 bucket
            key = "test/mangroves_change/" + "/".join(["x" + row[1][0], "y" + row[1][1], file_name])
            response = s3_client.upload_file(file_name, bucket, key)
            print(file_name)
            # remove the local file (too many files will use up the quota of sandbox)
            os.remove(file_name)
        bc_canopy = ac_canopy

(0, 0    08
1    26
Name: 0, dtype: object)
1987
derivative/ga_ls_mangrove_cover_cyear_3/2-0-2/x08/y26/1987--P1Y/ga_ls_mangrove_cover_cyear_3_x08y26_1987--P1Y_final_canopy_cover_class.tif
1988
derivative/ga_ls_mangrove_cover_cyear_3/2-0-2/x08/y26/1988--P1Y/ga_ls_mangrove_cover_cyear_3_x08y26_1988--P1Y_final_canopy_cover_class.tif
dask.array<zeros, shape=(1, 3200, 3200), dtype=uint8, chunksize=(1, 3200, 3200), chunktype=numpy.ndarray>
ga_ls_mangrove_change_cyear_3_x08y26_1987_1988.tif
1989
derivative/ga_ls_mangrove_cover_cyear_3/2-0-2/x08/y26/1989--P1Y/ga_ls_mangrove_cover_cyear_3_x08y26_1989--P1Y_final_canopy_cover_class.tif
dask.array<zeros, shape=(1, 3200, 3200), dtype=uint8, chunksize=(1, 3200, 3200), chunktype=numpy.ndarray>
ga_ls_mangrove_change_cyear_3_x08y26_1988_1989.tif
1990
derivative/ga_ls_mangrove_cover_cyear_3/2-0-2/x08/y26/1990--P1Y/ga_ls_mangrove_cover_cyear_3_x08y26_1990--P1Y_final_canopy_cover_class.tif
dask.array<zeros, shape=(1, 3200, 3200), dtype=uint8, chunksize=(1