In [None]:
import json
from shapely import geometry
import rioxarray as rioxr
import xarray as xr
from datacube.utils.dask import start_local_dask
import awswrangler as wr
import re

In [None]:
with open("au-grid-clipped.geojson", "r") as f:
    features = json.load(f)

region_code = {}
for f in features['features']:
    if f['properties']['input_region_code'] not in region_code:
        region_code[f['properties']['input_region_code']] = 1
    else:
        region_code[f['properties']['input_region_code']] += 1

unique_features = []
for f in features['features']:
    if region_code[f['properties']['input_region_code']] <= 9:
        unique_features += [f]

valid_features = []
for f in unique_features:
    if region_code[f['properties']['input_region_code']] > 1:
        if (geometry.shape(f['geometry']).area < 1e-5):
            continue
    valid_features += [f]

features['features'] = valid_features

with open("au-grid-extended-clipped.geojson", "w") as f:
    json.dump(features, f)

In [None]:
with open("au-grid-extended-clipped.geojson", "r") as f:
    features = json.load(f)

id_match = {}
for f in features['features']:
    id_match[f['properties']['input_region_code']] = f['properties']['method_region_code']

In [None]:
client = start_local_dask(n_workers=1, threads_per_worker=60, memory_limit='478GB')
client

In [None]:
prod_path = "s3://dea-public-data-dev/derivative/ga_ls8cls9c_gm_cyear_3/4-0-0"
test_path = "s3://dea-public-data-dev/test/gm-ls8-dilation-6-cloud-opening-3-v3/3-0-0"
checked = False
for key, val in id_match.items():
    if (key != 'x59y54') & (not checked):
        continue
    else:
        checked = True
    print(f"tile {key}")
    objs = wr.s3.list_objects("/".join([prod_path, key[0:3], key[3:]])+ "/2015--P1Y", suffix=['tif'])
    if objs == []:
        continue
    prod_set = []
    for o in objs:
        data = rioxr.open_rasterio(o, chunks={'x':1600, 'y':1600})
        data.name = re.findall(r'(?<=P1Y_final_)\w+', o)[0]
        prod_set += [data]
    prod_set = xr.merge(prod_set)
    prod_set = prod_set.rename_dims({'band': 'time'})
    prod_set = prod_set.rename_vars({'band': 'time'})
    prod_set.time.data[0] = 2015
    
    objs = wr.s3.list_objects("/".join([test_path, val[0:3], val[3:]])+ "/2015--P1Y", suffix=['tif'])
    if objs == []:
        continue
    test_set = []
    for o in objs:
        data = rioxr.open_rasterio(o, chunks={'x':1600, 'y':1600})
        data.name = re.findall(r'(?<=P1Y_final_)\w+', o)[0]
        test_set += [data]
    test_set = xr.merge(test_set)
    test_set = test_set.rename_dims({'band': 'time'})
    test_set = test_set.rename_vars({'band': 'time'})
    test_set.time.data[0] = 2015
    
    diff = abs(test_set - prod_set).min() > 1e-4
    if diff.to_array().any():
        print(f"found difference {key} : {val}")
        break

In [None]:
client.close()