In [None]:
import geopandas as gpd
from ipyfilechooser import FileChooser
from pathlib import Path
import rioxarray as rxr
import rioxarray.merge
from tqdm.auto import tqdm

import s3fs
from shapely.geometry import mapping

### Select the chip shape file

In [None]:
print("Select the chip shape file generated in Notebook 0")
fc = FileChooser(Path.cwd())
display(fc)

### Load the chip geometry from the chip shapefile

In [None]:
shape_path = Path(fc.selected)
shape_path

In [None]:
geometry_gdf = gpd.read_file(shape_path)
geometry = geometry_gdf['geometry'][0]
geometry

### Collect the chip directory path and chip ID

In [None]:
chip_dir = shape_path.parent
chip_id = chip_dir.name.split('_')[-1]
chip_dir

### Download all ESA WorldCover tiles intersecting the chip

In [None]:
bucket = "s3://esa-worldcover/v100/2020"
s3 = s3fs.S3FileSystem(anon=True)

geojson = f'{bucket}/esa_worldcover_2020_grid.geojson'
grid = gpd.read_file(geojson)
tiles = grid[grid.intersects(geometry)]

for tile in tiles.ll_tile:
    tile_path = f"{bucket}/map/ESA_WorldCover_10m_2020_v100_{tile}_Map.tif"
    output_path = f"{chip_dir}/{tile_path.split('/')[-1]}"
    s3.download(tile_path, output_path)

### Clip tiles to chip geometry and merge 

In [None]:
tile_paths = list(chip_dir.rglob("ESA_WorldCover_10m_2020_v100*.tif"))

to_merge = list()
for tile_path in tqdm(tile_paths):
    tile = rxr.open_rasterio(tile_path, masked=True).squeeze()
    tile = tile.rio.clip(geometry_gdf.geometry.apply(mapping), 'EPSG:4326')
    to_merge.append(tile)

### Write clipped, merged data to GeoTiff

In [None]:
merged = rxr.merge.merge_arrays(to_merge)
merged.rio.to_raster(chip_dir/f"ESA_WorldCover_10m_2020_v100_chip_{chip_id}.tif")

### Delete intermediary files

In [None]:
for p in tile_paths:
    try:
        p.unlink()
    except FileNotFoundError:
        pass