In [1]:
import os
os.chdir("../")

In [2]:
import pystac_client
import pystac
from requests.adapters import HTTPAdapter
from urllib3 import Retry
from pystac_client.stac_api_io import StacApiIO
import planetary_computer

import dask.distributed
import numpy as np
import rioxarray
import pandas as pd
import geopandas as gpd
from src.utils import search_s2_scenes, search_lc_scene, stack_s2_data, stack_lc_data, unique_class, missing_values, gen_chips
import yaml

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
with open("config.yml", "r") as file:
    config = yaml.safe_load(file)

In [5]:
aoi_gdf = gpd.read_file("data/map.geojson")

In [6]:
# Following AOIs have broken scenes in the STAC catalog and should be removed
aoi_gdf = aoi_gdf[aoi_gdf.index != 12]
aoi_gdf = aoi_gdf[aoi_gdf.index != 25]
aoi_gdf = aoi_gdf[aoi_gdf.index != 46]
aoi_gdf = aoi_gdf[aoi_gdf.index != 60]
aoi_gdf = aoi_gdf[aoi_gdf.index != 81]
aoi_gdf = aoi_gdf[aoi_gdf.index != 153]

In [7]:
from dask.distributed import Client, LocalCluster
cluster = LocalCluster()#(n_workers=8, threads_per_worker=2)
client = Client(cluster)
print(client.dashboard_link)

http://127.0.0.1:8787/status




In [8]:
retry = Retry(
    total=10, backoff_factor=1, status_forcelist=[502, 503, 504], allowed_methods=None
)
stac_api_io = StacApiIO(max_retries=retry)

catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
    stac_io=stac_api_io
)

In [9]:
def process_chips(s2_stack, lc_stack, epsg, sample_size, chip_size, global_index, metadata_df):
    
    try:
        lc_stack = lc_stack.compute()
    except:
        print("skipping the AOI for no LC data")
        return global_index, metadata_df
    
    lc_uniqueness = lc_stack.coarsen(x = sample_size,
                                     y = sample_size,
                                     boundary = "trim"
                                    ).reduce(unique_class)
    lc_uniqueness[0, :] = False
    lc_uniqueness[-1, :] = False
    lc_uniqueness[:, 0] = False
    lc_uniqueness[:, -1] = False

    ys, xs = np.where(lc_uniqueness)
    print("Loading s2_stack")
    
    try:
        s2_stack = s2_stack.compute()
    except:
        print("skipping the AOI for no S2 data")
        return global_index, metadata_df
    
    
    for index in range(0, len(ys)):
        y = ys[index]
        x = xs[index]
    
            
        x_coords = slice((x) * sample_size - int((chip_size - sample_size)/2), (x + 1) * sample_size + int((chip_size - sample_size)/2))
        y_coords = slice((y) * sample_size - int((chip_size - sample_size)/2), (y + 1) * sample_size + int((chip_size - sample_size)/2))    
        
        s2_array = s2_stack.isel(x = x_coords, y = y_coords)
        s2_array.rio.write_crs(f"epsg:{epsg}", inplace=True)
        s2_array = s2_array.where((s2_array.x >= s2_stack.x[(x) * sample_size]) &
                                  (s2_array.x < s2_stack.x[(x + 1) * sample_size]) & 
                                  (s2_array.y <= s2_stack.y[(y) * sample_size]) &
                                  (s2_array.y > s2_stack.y[(y + 1) * sample_size])
                                 )
        
        if missing_values(s2_array, chip_size, sample_size):
            # print(f"Skipping chip at index {index}")
            continue        
        
        s2_array = s2_array.fillna(-999)
        s2_array = s2_array.rio.write_nodata(-999)
        s2_array = s2_array.astype(np.dtype(np.int16))
        s2_array = s2_array.rename("s2")
        

                
        lc_array = lc_stack.isel(x = x_coords, y = y_coords)
        lc_array.rio.write_crs(f"epsg:{epsg}", inplace=True)
        lc_array = lc_array.where((lc_array.x >= lc_stack.x[(x) * sample_size]) &
                                  (lc_array.x < lc_stack.x[(x + 1) * sample_size]) & 
                                  (lc_array.y <= lc_stack.y[(y) * sample_size] ) &
                                  (lc_array.y > lc_stack.y[(y + 1) * sample_size])
                                 )
        
        if missing_values(lc_array, chip_size, sample_size):
            # print(f"Skipping chip at index {index}")
            continue

        if (np.isin(lc_array, [255, 130, 133])).any():
            raise ValueError('Wrong LC value')
        
        lc_array = lc_array.fillna(0)
        lc_array = lc_array.rio.write_nodata(0)
        lc_array = lc_array.astype(np.dtype(np.int8))
        lc_array = lc_array.rename("lc")
        
        gen_status, dts = gen_chips(s2_array, lc_array, global_index)
        if gen_status:
            metadata_df = pd.concat([pd.DataFrame([[global_index,
                                                    dts,
                                                    np.unique(lc_array)[1],
                                                    s2_stack.x[(x) * sample_size + int(sample_size / 2)].data,
                                                    s2_stack.y[(y) * sample_size + int(sample_size / 2)].data,
                                                    epsg]
                                                  ],
                                                  columns=metadata_df.columns
                                                 ),
                                     metadata_df],
                                    ignore_index=True
                                   )
            global_index += 1
    
    return global_index, metadata_df

In [10]:
global_index = 0
metadata_df = pd.DataFrame(columns=["chip_id", "dates", "lc", "x_center", "y_center", "epsg"])

In [None]:
for index, aoi in aoi_gdf.iterrows():
    print(f"\nProcessing AOI at index {index}")
    
    aoi_bounds = aoi['geometry'].bounds
    s2_items = pystac.item_collection.ItemCollection([])
    for date_range in config["sentinel_2"]["time_ranges"]:        
        s2_items_season = search_s2_scenes(aoi, date_range, catalog, config)
        s2_items += s2_items_season

    if len(s2_items)<4:
        print(f"Missing Sentinel-2 scenes for AOI {aoi_bounds}")
        continue
        

    s2_stack = stack_s2_data(s2_items, config)
    if s2_stack is None:
        print(f"Failed to stack Sentinel-2 bands for AOI {aoi_bounds}")
        continue
        
    try:
        epsg = s2_items[0].properties["proj:epsg"]
    except:
        epsg = int(s2_items[0].properties["proj:code"].split(":")[-1])
        


    lc_items = search_lc_scene(s2_items[0].bbox, catalog, config)
    if not lc_items:
        print(f"No Land Cover data found for AOI {aoi_bounds}")
        continue
    
    lc_stack = stack_lc_data(lc_items, s2_stack.rio.crs.to_epsg(), s2_items[0].bbox, config)
    if lc_stack is None:
        print(f"Failed to stack Land Cover data for AOI {aoi_bounds} and date range {date_range}")
        continue
    
    global_index, metadata_df = process_chips(s2_stack,
                                              lc_stack,
                                              epsg,
                                              config["chips"]["sample_size"],
                                              config["chips"]["chip_size"],
                                              global_index,
                                              metadata_df)
    metadata_df.to_csv('/home/benchuser/data/metadata_df.csv', index=False)


Processing AOI at index 0
Loading s2_stack

Processing AOI at index 1
Loading s2_stack

Processing AOI at index 2
Missing Sentinel-2 scenes for AOI (8.281784468562478, 7.210997528023341, 8.304940369139388, 7.239302354410327)

Processing AOI at index 3
Loading s2_stack

Processing AOI at index 4
Loading s2_stack

Processing AOI at index 5
Loading s2_stack

Processing AOI at index 6
Missing Sentinel-2 scenes for AOI (50.41217017222479, 29.404897305042425, 50.710146229577674, 29.642040867604592)

Processing AOI at index 7
Missing Sentinel-2 scenes for AOI (79.19574903465758, 24.58560511266596, 79.26275151429297, 24.635076317899106)

Processing AOI at index 8
Missing Sentinel-2 scenes for AOI (93.39743605894972, 23.10213901480563, 93.69421382167826, 23.297442448554904)

Processing AOI at index 9
Missing Sentinel-2 scenes for AOI (112.7443971126383, -0.2287540882034449, 112.8611036002244, -0.11674605820823558)

Processing AOI at index 10
Missing Sentinel-2 scenes for AOI (140.3603063903077

  return func(*args, **kwargs)
  return func(*args, **kwargs)


Loading s2_stack

Processing AOI at index 15
Missing Sentinel-2 scenes for AOI (8.287660273469072, 52.97498481955799, 8.502031688629586, 53.07649549856325)

Processing AOI at index 16
Loading s2_stack

Processing AOI at index 17
Missing Sentinel-2 scenes for AOI (-6.211705149471527, 9.994187569998957, -6.1415481082109125, 10.05319589518021)

Processing AOI at index 18
Loading s2_stack

Processing AOI at index 19
Loading s2_stack

Processing AOI at index 20
Loading s2_stack


2025-02-23 21:11:22,101 - distributed.worker - ERROR - Compute Failed
Key:       ('fetch_raster_window-24c09655d0ed1c3072d930edfadc20eb', 0, 2, 2, 5)
State:     executing
Function:  subgraph_callable-79ef2ab250881ecd3960e9e8b9c6d589
args:      (array([[(<stackstac.rio_reader.AutoParallelRioReader object at 0x7f809857e7b0>, Window(col_off=7.0, row_off=9.0, width=10980.0, height=10980.0))]],
      dtype=object), (slice(2048, 3072, None), slice(5120, 6144, None)), dtype('float64'), nan)
kwargs:    {}
Exception: 'RuntimeError("Error reading Window(col_off=5120, row_off=2048, width=1024, height=1024) from \'https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/20/M/PC/2023/03/31/S2B_MSIL2A_20230331T142719_N0509_R053_T20MPC_20230331T202754.SAFE/GRANULE/L2A_T20MPC_A031682_20230331T142715/IMG_DATA/R10m/T20MPC_20230331T142719_B04_10m.tif?st=2025-02-22T20%3A24%3A03Z&se=2025-02-23T21%3A09%3A03Z&sp=rl&sv=2024-05-04&sr=c&skoid=9c8ff44a-6a2c-4dfb-b298-1c9212f64d9a&sktid=72f988bf-86f1-41af-91ab-2

skipping the AOI for no S2 data

Processing AOI at index 21
Missing Sentinel-2 scenes for AOI (-40.815038483476656, -11.83788851475856, -40.391761022596285, -11.575795737251639)

Processing AOI at index 22


  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
