In [1]:
from osgeo import ogr, gdal, osr
import pandas as pd
import shapely.wkt
import fiona
import json
import os
import base64
import numpy as np
from pathlib import Path
import boto3
from datacube import Datacube
from datacube.utils.geometry import CRS, Geometry, GeoBox
from datacube.testutils.io import native_geobox, native_load
from datacube.utils.dask import start_local_dask
import rioxarray as rioxr
import awswrangler as wr
from datetime import timedelta, datetime
import dask.dataframe as dd
import matplotlib.pyplot as plt
import re

from datacube.utils.masking import make_mask

s3_obj = boto3.client('s3')
dc = Datacube()



In [2]:
ls98_mangroves_paths = wr.s3.list_objects("s3://dea-public-data-dev/test/derivative/ga_ls_mangrove_cover_cyear_3/4-0-1/", suffix=[".json"])
ls8_mangroves_paths = wr.s3.list_objects("s3://dea-public-data-dev/test/derivative/ga_ls_mangrove_cover_cyear_3/4-0-1/", suffix=[".json"])

In [3]:
len(ls98_mangroves_paths), len(ls8_mangroves_paths)

(155, 155)

In [4]:
ls98_mangroves_paths_2022 = [x for x in ls98_mangroves_paths if '2022--P1Y' in x]
ls8_mangroves_paths_2022 = [x for x in ls8_mangroves_paths if '2022--P1Y' in x]

In [5]:
pattern = r'\/x\d+\/y\d+\/'

def tile_ids_from_path(ds_paths):
    tiles = []
    
    for s in ds_paths:
        # Search for the pattern in the string
        match = re.search(pattern, s)

        if match:
            tile_id = match.group(0).replace("/","")
            tiles.append(tile_id)
    return tiles

In [6]:
ls98_ds_2022 = tile_ids_from_path(ls98_mangroves_paths_2022)
ls8_ds_2022 = tile_ids_from_path(ls8_mangroves_paths_2022)

In [7]:
cmn_tiles_2022 = set(ls98_ds_2022).intersection(set(ls8_ds_2022))

In [8]:
def native_load_dss(dss, base_ds, measurements=None):
    geobox = native_geobox(
            base_ds, basis=list(dss[0].measurements.keys())[0]
        )
    mm = dss[0].type.lookup_measurements(measurements)
    tmp_data = dc.load_data(dc.group_datasets(dss, 'time'),
                    geobox, measurements=mm, dask_chunks={'time': 1, 'x': -1, 'y': -1})
        
    return tmp_data

In [9]:
    
    # ls89_ds = wr.s3.list_objects("s3://dea-public-data-dev/test/derivative/ga_ls_mangrove_cover_cyear_3/4-0-1/"+
    #                              a[:3]+"/"+a[3:]+f"/{year}--P1Y", suffix=[".json"])[0]
    
    # s3_clientobj_ls89 = s3_obj.get_object(Bucket='dea-public-data-dev', Key=ls89_ds.removeprefix("s3://dea-public-data-dev/"))
    # s3_clientdata_ls89 = s3_clientobj_ls89['Body'].read().decode('utf-8')

    #     load_path_ls89 = json.loads(s3_clientdata_ls89)["assets"]['canopy_cover_class']['href']                                       
    #     ls89_mangroves_data = rioxr.open_rasterio(load_path_ls89)     
    #     ls89_mangroves_data = ls89_mangroves_data.where(ls89_mangroves_data[0] != 255)

    # , ls89_mangroves_data[ls89_mangroves_data > 0],   
        # diff = np.where(np.isnan(ls8_mangroves_data) | np.isnan(ls89_mangroves_data), np.nan, np.subtract(ls8_mangroves_data, ls89_mangroves_data))
        # # diff = ls8_mangroves_data - ls89_mangroves_data
        # diff = abs(diff[~np.isnan(diff)])

In [10]:
year = 2022

from datacube.utils import masking
import dask.array as da
from odc.algo import enum_to_bool, erase_bad
import multiprocessing

# Define a function to compute the diff for a single tile
def run_mangroves(a, year, path):    
    ds = wr.s3.list_objects(path+
                            a[0:3]+"/"+a[3:]+f"/{year}--P1Y", suffix=[".json"])[0]    

    s3_clientobj = s3_obj.get_object(Bucket='dea-public-data-dev', Key=ds.removeprefix("s3://dea-public-data-dev/"))
    s3_clientdata = s3_clientobj['Body'].read().decode('utf-8')    

    load_path = json.loads(s3_clientdata)["assets"]['canopy_cover_class']['href']                                           
    mangroves_data = rioxr.open_rasterio(load_path) 
    mangroves_data = mangroves_data.where(mangroves_data[0] != 255).values
    return mangroves_data
    # if ~np.isnan(mangroves_data[mangroves_data >= 0]).all():            
    #     return mangroves_data[mangroves_data > 0
    

def run_fcp_cmp(path):
    # Specify the number of cores
    
    num_cores = multiprocessing.cpu_count()
    print(num_cores)
    num_cores = 1
    # Create a multiprocessing pool with specified number of processes
    pool = multiprocessing.Pool(processes=num_cores)

    results = {}
    nomatch_count = 0

    
    mangroves = []    

    # Use multiprocessing to compute diffs for each tile in parallel
    for a in cmn_tiles_2022:    
        mangroves.append(pool.apply_async(run_mangroves, (a, 2022, path)))        
        
    # Close the multiprocessing pool
    mangroves = [ds.get() for ds in mangroves]    
    
    pool.close()
    pool.join()
    return mangroves

In [11]:
ls8_mangroves_results = run_fcp_cmp('s3://dea-public-data-dev/test/derivative/ga_ls_mangrove_cover_cyear_3/4-0-0/')

96


In [12]:
ls89_mangroves_results = run_fcp_cmp('s3://dea-public-data-dev/test/derivative/ga_ls_mangrove_cover_cyear_3/4-0-1/')

96


In [13]:
ls8_mangroves_results_flat = np.concatenate(ls8_mangroves_results)
ls89_mangroves_results_flat = np.concatenate(ls89_mangroves_results)

In [14]:
def mangroves_area(results):    
    return np.sum(results) * 30**2    

In [15]:
total_area_ls8 = mangroves_area(ls8_mangroves_results_flat[ls8_mangroves_results_flat >= 0])
total_area_ls89 = mangroves_area(ls89_mangroves_results_flat[ls89_mangroves_results_flat >= 0])
print("Total difference in mangroves expected area (km, ls8-ls89):", (total_area_ls8-total_area_ls89)/1e+3)

Total difference in mangroves expected area (km, ls8-ls89): -142731.0


In [16]:
ls8_total_detected_mangroves_area = mangroves_area(ls8_mangroves_results_flat[ls8_mangroves_results_flat > 0])
ls89_total_detected_mangroves_area = mangroves_area(ls89_mangroves_results_flat[ls89_mangroves_results_flat > 0])                                          
print(f"Total percentage change {(ls8_total_detected_mangroves_area - ls89_total_detected_mangroves_area)/1e+3}")
 
for cls_value,cls_name in {0:'NotObserved', 1:'Woodland', 2:'OpenForest', 3:'ClosedForest'}.items():
    ls8 = mangroves_area(ls8_mangroves_results_flat[ls8_mangroves_results_flat == cls_value])
    ls89 = mangroves_area(ls89_mangroves_results_flat[ls89_mangroves_results_flat == cls_value])
    print(f'Percentage difference in {cls_name} ls8-ls89 {100*((ls8/total_area_ls8) - (ls89/total_area_ls89))}')

Total percentage change -142756.2
Percentage difference in NotObserved ls8-ls89 0.0
Percentage difference in Woodland ls8-ls89 -0.1723514986645372
Percentage difference in OpenForest ls8-ls89 -0.7419982011219461
Percentage difference in ClosedForest ls8-ls89 0.9143870169670665
