In [1]:
import dask.distributed
import pystac_client
import planetary_computer
import stackstac 
import numpy as np
import pandas as pd
import rioxarray

from utils import gen_chips, get_count

catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)

In [2]:
from dask.distributed import Client, LocalCluster
cluster = LocalCluster()
client = Client(cluster)
print(client.dashboard_link)

http://127.0.0.1:8787/status


In [3]:
# LC Search
lc_search = catalog.search(collections = ["io-lulc-annual-v02"],
                           datetime = "2023-01-02/2023-12-30", #This only returns 2023 tiles
                       )
lc_items = lc_search.item_collection()
print(f"Returned {len(lc_items)} Items")

Returned 756 Items


In [4]:
date_times = pd.DataFrame.from_records([[i.properties["end_datetime"]] for i in lc_items])
date_times.iloc[:, 0].unique()

array(['2024-01-01T00:00:00Z'], dtype=object)

In [None]:
s2_assets = ["B02", "B03", "B04", "B08", "B11", "B12"]
chip_size = 224
sample_size = 100
metadata_df = pd.DataFrame(columns=["chip_id", "lc", "tlc_x", "tlc_y", "epsg"])

global_index = 0
for item_id in [145, 158, 200, 384, 379, 387,399, 507, 532, 588, 601, 615, 628, 640, 674]:
    lc_item = lc_items[item_id]
    s2_search = catalog.search(collections = ["sentinel-2-l2a"],
                               bbox = lc_item.bbox,
                               datetime="2023-02-01/2023-08-30",
                               query=["eo:cloud_cover<1"],
                               # sortby=["+properties.eo:cloud_cover"],
                               max_items=50
                           )
    s2_items = s2_search.item_collection()
    s2_stack = stackstac.stack(
        s2_items,
        assets = s2_assets,
        epsg = lc_item.properties["proj:epsg"],
        resolution = 10, 
        bounds_latlon=lc_item.bbox
    )
    s2_stack_resampled = s2_stack.median("time", skipna=True).squeeze()
    s2_stack_resampled = s2_stack_resampled.chunk(chunks={"band":1, "x":2000, "y":2000})

    lc_stack = stackstac.stack(
        lc_item,
        dtype=np.ubyte,
        fill_value=255,
        sortby_date=False,
        bounds_latlon=lc_item.bbox
    ).squeeze()


    for i in range(10000, s2_stack_resampled.shape[1]-500, 2000):
        for j in range(10000, s2_stack_resampled.shape[2]-500, 2000):
            
            sub_lc_stack = lc_stack.isel(x=slice(i, (i+1) * 2000), y=slice(i, (i+1) * 2000))
            # sub_s2_stack = s2_stack_resampled.isel(x=slice(j, (j+1) * 2000), y=slice(j, (j+1) * 2000))
            
            lc_coarsen = sub_lc_stack.coarsen(x=sample_size, y=sample_size, boundary="trim")
            lc_count = lc_coarsen.reduce(get_count)
            indices = np.where(lc_count.values==1)
            
            for index, (ii, jj) in enumerate(zip(indices[0], indices[1])):
                x_coords = slice(i + ii * sample_size - int((chip_size - sample_size)/2), i + (ii+1) * sample_size + int((chip_size - sample_size)/2))
                y_coords = slice(j + jj * sample_size - int((chip_size - sample_size)/2), j + (jj+1) * sample_size + int((chip_size - sample_size)/2))
                
                s2_array = s2_stack_resampled.isel(x=x_coords, y=y_coords).compute()
                s2_array.rio.write_crs(f"epsg:{lc_item.properties["proj:epsg"]}", inplace=True)

                s2_array = s2_array.where((s2_array.x >= s2_stack_resampled.x[i + ii * sample_size]) & (s2_array.x < s2_stack_resampled.x[i + (ii+1) * sample_size]) & 
                                          (s2_array.y <= s2_stack_resampled.y[j + jj * sample_size] ) & (s2_array.y > s2_stack_resampled.y[j + (jj+1) * sample_size]))

                s2_array = s2_array.fillna(-999)
                s2_array = s2_array.rio.write_nodata(-999)
                s2_array = s2_array.astype(np.dtype(np.int16))
                s2_array = s2_array.rename("s2")
                
                lc_array = lc_stack.isel(x=x_coords, y=y_coords).compute()
                lc_array.rio.write_crs(f"epsg:{lc_item.properties["proj:epsg"]}", inplace=True)
                lc_array = lc_array.where((lc_array.x >= lc_stack.x[i + ii * sample_size]) & (lc_array.x < lc_stack.x[i + (ii+1) * sample_size]) & 
                                          (lc_array.y <= lc_stack.y[j + jj * sample_size] ) & (lc_array.y > lc_stack.y[j + (jj+1) * sample_size]))
                

                lc_array = lc_array.fillna(-99)
                lc_array = lc_array.rio.write_nodata(-99)
                lc_array = lc_array.astype(np.dtype(np.int8))
                lc_array = lc_array.rename("lc")
                
                gen_chips(s2_array, lc_array, global_index)
                
                metadata_df = pd.concat([pd.DataFrame([[global_index,
                                                        lc_array.mean(skipna=True).data,
                                                        s2_stack_resampled.x[i + ii * sample_size].data,
                                                        s2_stack_resampled.y[j + jj * sample_size].data,
                                                        lc_item.properties["proj:epsg"]]
                                                      ],
                                                      columns=metadata_df.columns
                                                     ),
                                         metadata_df],
                                        ignore_index=True
                                       )
                global_index += 1
            break
        break


In [None]:
# import leafmap
# m = leafmap.Map(center=[13.85, 100.15], zoom=8, height="400px")
# m

In [None]:
# import matplotlib.pyplot as plt
# plt.imshow(s2_array.isel(band=1))

In [None]:
# polygon = m.user_rois['features'][0]['geometry']
# bbox = shapely.geometry.Polygon(polygon["coordinates"][0]).bounds
# bbox = (-56.863861, -0.637333, -56.477966, -0.358566)