In [1]:
import leafmap
import geopandas as gpd
from geospatial_tools import DATA_DIR
from geospatial_tools.planetary_computer.sentinel_2 import BestProductsForFeatures, download_and_process_sentinel2_asset
from geospatial_tools.raster import clip_raster_with_polygon
from geospatial_tools.stac import Asset
from geospatial_tools.utils import get_yaml_config, download_url, unzip_file
from geospatial_tools.vector import create_vector_grid_parallel, to_geopackage, select_polygons_by_location

## Base data

The USA polygon is base off 2018's `cb_2018_us_nation_20m` shapefile, taken from here: 
https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html

It was then processed using QGIS to keep only the contiguous states, without any islands.

The Sentinel 2 grid was taken from the kml file found here: 
https://sentiwiki.copernicus.eu/web/s2-products

Below is some code to help with the download part.

### Downloading data
Let's download our source data

In [2]:
file_configs = get_yaml_config("data_file_links")
raw_usa_polygon_path = file_configs["united_states_polygon"]["url"]
raw_s2_tiling_grid_path = file_configs["sentinel_2_tiling_grid"]["url"]
download_list = {"raw_usa_polygon": raw_usa_polygon_path, "raw_s2_tiling_grid": raw_s2_tiling_grid_path}
file_list = [download_url(url=url, filename=f"{DATA_DIR}/{key}.zip") for key, url in download_list.items()]

file_list



[PosixPath('/home/francispelletier/projects/geospatial_tools/data/raw_usa_polygon.zip'),
 PosixPath('/home/francispelletier/projects/geospatial_tools/data/raw_s2_tiling_grid.zip')]

In [3]:
[unzip_file(zip_path=f, extract_to=DATA_DIR) for f in file_list]

[['/home/francispelletier/projects/geospatial_tools/data/cb_2018_us_nation_20m.shp.ea.iso.xml',
  '/home/francispelletier/projects/geospatial_tools/data/cb_2018_us_nation_20m.shp.iso.xml',
  '/home/francispelletier/projects/geospatial_tools/data/cb_2018_us_nation_20m.shp',
  '/home/francispelletier/projects/geospatial_tools/data/cb_2018_us_nation_20m.shx',
  '/home/francispelletier/projects/geospatial_tools/data/cb_2018_us_nation_20m.dbf',
  '/home/francispelletier/projects/geospatial_tools/data/cb_2018_us_nation_20m.prj',
  '/home/francispelletier/projects/geospatial_tools/data/cb_2018_us_nation_20m.cpg'],
 ['/home/francispelletier/projects/geospatial_tools/data/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml']]

### Initial pre-processing

The above layers were processed using QGIS.

For the purpose of this analysis, only the contiguous lower 48 states have been conserved; smaller islands/land masses 
have also been striped.

The S2 tiling grid has been trimmed to keep only the grid cells that overlap with the 
contiguous states.

Since our area of study is quite large, the `EPSG:5070` projection was chosen, as it
covers the whole area, introduces minimal distortion while preserving area.

The files below have also been saved in this repository.

In [4]:
USA_POLYGON_FILE = DATA_DIR / "usa_polygon_5070.gpkg"
S2_USA_GRID_FILE = DATA_DIR / "s2_grid_usa_polygon_5070.gpkg"
usa_polygon = gpd.read_file(USA_POLYGON_FILE)
s2_grid = gpd.read_file(S2_USA_GRID_FILE)

In [5]:
usa_polygon

Unnamed: 0,AFFGEOID,GEOID,NAME,geometry
0,0100000US,US,United States,"MULTIPOLYGON (((-2116048.733 3142966.552, -211..."


In [6]:
s2_grid

Unnamed: 0,name,folders,description,geometry
0,12TUP,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-1386334.944 2487548.77 0, -..."
1,12TYQ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-976300.478 2523767.452 0, -..."
2,12TYR,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-960099.705 2622374.255 0, -..."
3,12TYN,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-1008622.024 2325748.358 0, ..."
4,12TYP,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-992478.385 2424861.34 0, -8..."
...,...,...,...,...
977,12TTM,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-1515431.586 2304192.826 0, ..."
978,12TUK,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-1448525.813 2089886.667 0, ..."
979,12TUQ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-1371006.917 2586590.133 0, ..."
980,12TUR,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-1355793.563 2685354.08 0, -..."


In [7]:
m = leafmap.Map(center=[40, -98], zoom=4)

# In blue, the USA polygon
m.add_gdf(usa_polygon, layer_name='usa')
# In red, the Sentinel 2 grid
m.add_gdf(s2_grid, layer_name='s2_grid', style={"color": "red"})

m

Map(center=[40, -98], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_te…

## Creating our grid

From this, we want to create a grid of square polygons with which we will later on
query the [Planetary Computer](https://planetarycomputer.microsoft.com/dataset/sentinel-2-l2a)
Sentinel 2 dataset and clip the selected Sentinel 2 images.

For the purpose of this notebook, the grid that will be created will use 10km by 10km squares to speed up 
processing.

In [8]:
grid_size = 5000
bbox = usa_polygon.total_bounds
grid_5km_filename = DATA_DIR / "polygon_grid_5km.gpkg"

In [9]:
print("Starting processing for [create_vector_grid_parallel]")
grid_5km = create_vector_grid_parallel(bounding_box=bbox, grid_size=grid_size, crs="EPSG:5070")
print(f"Printing len(grid_parallel) to check if grid contains same amount of polygons : {len(grid_5km)}")
to_geopackage(gdf=grid_5km, filename=grid_5km_filename)

PosixPath('/home/francispelletier/projects/geospatial_tools/data/polygon_grid_5km.gpkg')

In [10]:
grid_5km

Unnamed: 0,geometry,feature_id
0,"POLYGON ((-2356113.743 301469.316, -2351113.74...",85093ed6-a3e6-4e93-adc8-e94d931e4a4b
1,"POLYGON ((-2351113.743 301469.316, -2346113.74...",f9cf7576-f5d4-42a9-8c81-5dd99b4a98e8
2,"POLYGON ((-2346113.743 301469.316, -2341113.74...",95fd96bf-5803-429a-9f5e-a01f3d86e938
3,"POLYGON ((-2341113.743 301469.316, -2336113.74...",8fb98f6b-e669-4f46-a917-75720bff49e2
4,"POLYGON ((-2336113.743 301469.316, -2331113.74...",797430df-c0e5-48b4-a0bd-677d989773ab
...,...,...
528874,"POLYGON ((2233886.257 3161469.316, 2238886.257...",59daf2ca-7350-464f-924d-44b5f176e715
528875,"POLYGON ((2238886.257 3161469.316, 2243886.257...",9d59463b-1f0f-4e4f-9ea8-ef41d0a81e4f
528876,"POLYGON ((2243886.257 3161469.316, 2248886.257...",37bb15c2-06fc-4fae-9454-315bdb40d315
528877,"POLYGON ((2248886.257 3161469.316, 2253886.257...",abed9791-a490-40b7-8511-2e7f457ad192


### Selecting the useful polygons

Now, since our grid was created using the extent of our input polygon (continental USA), we need to filter out the polygons that do not intersect with it.

Doing this in Python is not the most efficient way to do things, but since it's a step that shouldn't be done over and over, it's not that critical.

If ever you need to do this step in an efficient way because the data is just too big or too complex, it would be better off going through QGIS, PyGQIS, GDAL or 
some other more efficient way to do this operation. 

In [11]:
intersecting_polygons_filename = DATA_DIR / "intersecting_polygon_grid_5km.gpkg"
print("Starting intersect selection")
intersecting_polygons = select_polygons_by_location(grid_5km, usa_polygon, num_of_workers=4)

# Optionally save to a new file
to_geopackage(intersecting_polygons, intersecting_polygons_filename)

PosixPath('/home/francispelletier/projects/geospatial_tools/data/intersecting_polygon_grid_5km.gpkg')

In [12]:
intersecting_polygons

Unnamed: 0,geometry,feature_id
0,"POLYGON ((1513886.257 301469.316, 1518886.257 ...",eac0413f-10d9-411b-ba46-0e80c1c6fac4
1,"POLYGON ((1518886.257 301469.316, 1523886.257 ...",867e9b5a-9ad3-41c2-80a0-a09a7938ec1d
2,"POLYGON ((1523886.257 301469.316, 1528886.257 ...",d4878be3-7e12-41dd-aa00-66c1122e1409
3,"POLYGON ((1528886.257 301469.316, 1533886.257 ...",542bae80-ec55-4dd8-9245-2364252f3558
4,"POLYGON ((-146113.743 306469.316, -141113.743 ...",f7e0b384-91b3-4494-8c69-7f328cbcdc29
...,...,...
316126,"POLYGON ((-1966113.743 3161469.316, -1961113.7...",c33d198d-7f77-4103-baa4-c2d5c6b39d0c
316127,"POLYGON ((-1961113.743 3161469.316, -1956113.7...",70b5b7f2-705a-493e-8c9b-0374dbc5a1b5
316128,"POLYGON ((-1956113.743 3161469.316, -1951113.7...",ef96e314-449f-4fe2-977f-1eb8ae374469
316129,"POLYGON ((-1951113.743 3161469.316, -1946113.7...",6977cee1-a730-497c-8815-1f9bf4f82eaf


### Visualizing the selected polygons

This will take more or less time, depending on the number on polygons.

(Do not try this with a grid size smaller than 10000m)

In [13]:
### This takes a few minutes and navigation will be slow.
# Map is zoomed in to help with processing

m_intersecting_polygons = leafmap.Map(center=[39.7, -123], zoom=10)
m_intersecting_polygons.add_gdf(intersecting_polygons, layer_name='intersecting_polygons', style={"color": "blue"})
m_intersecting_polygons

Map(center=[39.7, -123], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out…

## Data processing pipeline prototype

### Finding the best image for each S2 tiling grid

In [14]:
# This is the full list of S2 grids
s2_tile_grid_list = s2_grid["name"].to_list()
s2_tile_grid_list

['12TUP',
 '12TYQ',
 '12TYR',
 '12TYN',
 '12TYP',
 '12TYS',
 '12TYT',
 '11SMB',
 '11SMC',
 '11SLV',
 '11SMA',
 '11SMS',
 '12UUV',
 '11SMT',
 '11SMD',
 '11SMR',
 '12UUU',
 '11SNA',
 '12TWS',
 '11SNB',
 '12TWT',
 '11SMU',
 '12TWQ',
 '11SMV',
 '12TWR',
 '12TXM',
 '11SNS',
 '12TXN',
 '11SNC',
 '12TXK',
 '11SND',
 '12TXL',
 '11SKD',
 '12TXR',
 '12TXS',
 '11SKB',
 '12TXP',
 '11SKC',
 '12TXQ',
 '11SKU',
 '12TYL',
 '11SKV',
 '12TYM',
 '12TXT',
 '11SKT',
 '12TYK',
 '11SLC',
 '11SLD',
 '11SLA',
 '11SLB',
 '11SLT',
 '11SLU',
 '19TBF',
 '19TBG',
 '19TDM',
 '19TDN',
 '19TEJ',
 '19TEK',
 '19TEN',
 '19TEL',
 '19TEM',
 '18STE',
 '18STF',
 '19TCG',
 '18STC',
 '19TCH',
 '18STD',
 '18STJ',
 '19TCF',
 '19TCL',
 '18STG',
 '19TCM',
 '18STH',
 '19TCJ',
 '19TCK',
 '19TDF',
 '19TDG',
 '19TDK',
 '19TDL',
 '19TDJ',
 '16TCQ',
 '16TCR',
 '16TCN',
 '16TCP',
 '16TDK',
 '16TDL',
 '16TCS',
 '16TCT',
 '16TDP',
 '15RWQ',
 '16TDQ',
 '16TDM',
 '15RXN',
 '16TDN',
 '15RXP',
 '16TDT',
 '16TEK',
 '16TDR',
 '16TDS',
 '16SGB',


In [15]:
# The list is a bit long, so we'll be continuing this notebook with the following subset
s2_tile_grid_subset_list = ["10TDK", "10TEK", "10SEJ", "10SDJ"]

### Building our processing list

First, let's make create a subselection of our dataset

In [16]:
S2_USA_GRID_FILE = DATA_DIR / "s2_grid_usa_polygon_5070.gpkg"
s2_grid = gpd.read_file(S2_USA_GRID_FILE)
s2_grid

Unnamed: 0,name,folders,description,geometry
0,12TUP,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-1386334.944 2487548.77 0, -..."
1,12TYQ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-976300.478 2523767.452 0, -..."
2,12TYR,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-960099.705 2622374.255 0, -..."
3,12TYN,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-1008622.024 2325748.358 0, ..."
4,12TYP,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-992478.385 2424861.34 0, -8..."
...,...,...,...,...
977,12TTM,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-1515431.586 2304192.826 0, ..."
978,12TUK,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-1448525.813 2089886.667 0, ..."
979,12TUQ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-1371006.917 2586590.133 0, ..."
980,12TUR,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-1355793.563 2685354.08 0, -..."


In [17]:
# Creating our S2 grid tile subset

S2_USA_GRID_SUBSET_FILE = DATA_DIR / "s2_grid_usa_polygon_5070_subset.gpkg"
s2_grid_subset = s2_grid[s2_grid["name"].isin(s2_tile_grid_subset_list)]

# Optionally save to geopackage
to_geopackage(gdf=s2_grid_subset, filename=S2_USA_GRID_SUBSET_FILE)

PosixPath('/home/francispelletier/projects/geospatial_tools/data/s2_grid_usa_polygon_5070_subset.gpkg')

In [18]:
s2_grid_subset

Unnamed: 0,name,folders,description,geometry
823,10SEJ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-2262059.689 2182514.584 0, ..."
841,10SDJ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-2357236.175 2210256.163 0, ..."
846,10TDK,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-2329019.777 2307113.875 0, ..."
857,10TEK,Features,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((-2233778.019 2279366.081 0, ..."


In [19]:
# Creating our polygon grid subset

intersect_polygons_subset_filename = DATA_DIR / "intersecting_polygon_grid_5km_subset.gpkg"
print("Starting intersect selection")
intersect_polygons_subset = select_polygons_by_location(intersecting_polygons, s2_grid_subset, predicate="within", num_of_workers=4)
# intersect_polygons_subset = gpd.read_file(intersect_polygons_subset_filename)

# Optionally save to a new file
to_geopackage(intersect_polygons_subset, intersect_polygons_subset_filename)

PosixPath('/home/francispelletier/projects/geospatial_tools/data/intersecting_polygon_grid_5km_subset.gpkg')

In [20]:
intersect_polygons_subset

Unnamed: 0,geometry,feature_id
0,"POLYGON ((-2206113.743 2051469.316, -2201113.7...",b384c6a4-a7d7-47ce-b8ae-cd49ef710670
1,"POLYGON ((-2201113.743 2051469.316, -2196113.7...",f9a2d0fb-aa1e-4408-95bb-46af2487cc23
2,"POLYGON ((-2196113.743 2051469.316, -2191113.7...",9a1e7487-84e0-4b7b-8722-50f1083d5a60
3,"POLYGON ((-2221113.743 2056469.316, -2216113.7...",8ec76086-0ab9-4297-91a4-47031bb19147
4,"POLYGON ((-2216113.743 2056469.316, -2211113.7...",08158e08-ecd3-4732-b89a-10b4921ec99e
...,...,...
1571,"POLYGON ((-2306113.743 2291469.316, -2301113.7...",5ec30cc1-dcf7-4f80-8a62-e6e762430f3d
1572,"POLYGON ((-2301113.743 2291469.316, -2296113.7...",f95e2a4e-bf3b-421d-828a-1648d4a7e204
1573,"POLYGON ((-2326113.743 2296469.316, -2321113.7...",8b71a67b-e50a-4a54-9db8-d88c4b41bdd4
1574,"POLYGON ((-2321113.743 2296469.316, -2316113.7...",986a07f6-6be5-4832-abe7-910f7302a47b


### Finding the best products for our subset use case

In [21]:
# `s2_feature_name_columns` is the name of the column in `s2_grid_subset` where the id of
# the different tiles is found.
#
# `vector_column_name` is the name of the column in which the best results will be stored

s2_feature_name_columns = "name"
vector_column_name = "s2_tiles"

# Initiating our client
best_products_client = BestProductsForFeatures(sentinel2_tiling_grid=s2_grid_subset,
                                               sentinel2_tiling_grid_column=s2_feature_name_columns,
                                               vector_features=intersect_polygons_subset,
                                               vector_features_column=vector_column_name,
                                               max_cloud_cover=15)

In [22]:
# Executing the search
#
# This search look only for complete products, meaning products with less than
# 5 percent of nodata.

start_year = 2023
end_year = 2024
start_month = 6
end_month = 7

best_products_client.create_date_ranges(start_year, end_year, start_month, end_month)
products = best_products_client.find_best_complete_products()
products

{'10SEJ': {'id': 'S2B_MSIL2A_20230713T184939_R113_T10SEJ_20241016T032119',
  'cloud_cover': 4.3e-05,
  'no_data': 0.280069},
 '10SDJ': {'id': 'S2A_MSIL2A_20230721T185921_R013_T10SDJ_20240911T103205',
  'cloud_cover': 0.004333,
  'no_data': 0.0},
 '10TDK': {'id': 'S2A_MSIL2A_20240705T185921_R013_T10TDK_20240706T050412',
  'cloud_cover': 0.000707,
  'no_data': 1e-05}}

In [23]:
# Selecting the best products for each vector tile
# This step is necessary as some of our vector polygons can be withing multiple S2 tiles.
# The best available S2 tile is therefore selected for each vector polygon.

best_results_path = DATA_DIR / "vector_tiles_with_s2tiles_subset.gpkg"
best_results = best_products_client.select_best_products_per_feature()
to_geopackage(best_results, best_results_path)

PosixPath('/home/francispelletier/projects/geospatial_tools/data/vector_tiles_with_s2tiles_subset.gpkg')

In [24]:
best_results

Unnamed: 0,geometry,feature_id,s2_tiles,best_s2_product_id
0,"POLYGON ((-2206113.743 2051469.316, -2201113.7...",b384c6a4-a7d7-47ce-b8ae-cd49ef710670,[10SEJ],S2B_MSIL2A_20230713T184939_R113_T10SEJ_2024101...
1,"POLYGON ((-2201113.743 2051469.316, -2196113.7...",f9a2d0fb-aa1e-4408-95bb-46af2487cc23,[10SEJ],S2B_MSIL2A_20230713T184939_R113_T10SEJ_2024101...
2,"POLYGON ((-2196113.743 2051469.316, -2191113.7...",9a1e7487-84e0-4b7b-8722-50f1083d5a60,[10SEJ],S2B_MSIL2A_20230713T184939_R113_T10SEJ_2024101...
3,"POLYGON ((-2221113.743 2056469.316, -2216113.7...",8ec76086-0ab9-4297-91a4-47031bb19147,[10SEJ],S2B_MSIL2A_20230713T184939_R113_T10SEJ_2024101...
4,"POLYGON ((-2216113.743 2056469.316, -2211113.7...",08158e08-ecd3-4732-b89a-10b4921ec99e,[10SEJ],S2B_MSIL2A_20230713T184939_R113_T10SEJ_2024101...
...,...,...,...,...
1515,"POLYGON ((-2306113.743 2291469.316, -2301113.7...",5ec30cc1-dcf7-4f80-8a62-e6e762430f3d,[10TDK],S2A_MSIL2A_20240705T185921_R013_T10TDK_2024070...
1516,"POLYGON ((-2301113.743 2291469.316, -2296113.7...",f95e2a4e-bf3b-421d-828a-1648d4a7e204,[10TDK],S2A_MSIL2A_20240705T185921_R013_T10TDK_2024070...
1517,"POLYGON ((-2326113.743 2296469.316, -2321113.7...",8b71a67b-e50a-4a54-9db8-d88c4b41bdd4,[10TDK],S2A_MSIL2A_20240705T185921_R013_T10TDK_2024070...
1518,"POLYGON ((-2321113.743 2296469.316, -2316113.7...",986a07f6-6be5-4832-abe7-910f7302a47b,[10TDK],S2A_MSIL2A_20240705T185921_R013_T10TDK_2024070...


### Visualizing the results


In [25]:
best_products_client.successful_results

{'10SEJ': {'id': 'S2B_MSIL2A_20230713T184939_R113_T10SEJ_20241016T032119',
  'cloud_cover': 4.3e-05,
  'no_data': 0.280069},
 '10SDJ': {'id': 'S2A_MSIL2A_20230721T185921_R013_T10SDJ_20240911T103205',
  'cloud_cover': 0.004333,
  'no_data': 0.0},
 '10TDK': {'id': 'S2A_MSIL2A_20240705T185921_R013_T10TDK_20240706T050412',
  'cloud_cover': 0.000707,
  'no_data': 1e-05}}

In [26]:
# We do, however, have one S2 grid missing
# No complete products where found for that tile.
# Therefore, it will have to be processed separately later, 
# by mosaicing different products together
#
# The consequence of this is that all vector polygons that are
# within this tile will not be processed at this time.

best_products_client.incomplete_results

['10TEK']

In [27]:
m_best_results = leafmap.Map(center=[39.7, -123], zoom=8)
m_best_results.add_gdf(s2_grid_subset, layer_name='s2_tiles', style={"color": "red"})
m_best_results.add_gdf(best_results, layer_name='vector_tiles_s2_grid', style={"color": "green"})
m_best_results

Map(center=[39.7, -123], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out…

In [28]:
group_by_product = best_results.groupby("best_s2_product_id")["feature_id"].agg(list).reset_index()
group_by_product

Unnamed: 0,best_s2_product_id,feature_id
0,S2A_MSIL2A_20230721T185921_R013_T10SDJ_2024091...,"[af641811-b4d7-4666-acbe-16207c4ded34, b47a8ab..."
1,S2A_MSIL2A_20240705T185921_R013_T10TDK_2024070...,"[3e70880c-8393-489a-b8bf-7fa3024c57b5, 5d0f6f5..."
2,S2B_MSIL2A_20230713T184939_R113_T10SEJ_2024101...,"[b384c6a4-a7d7-47ce-b8ae-cd49ef710670, f9a2d0f..."


In [29]:
product_list = group_by_product["best_s2_product_id"].tolist()
product_list

['S2A_MSIL2A_20230721T185921_R013_T10SDJ_20240911T103205',
 'S2A_MSIL2A_20240705T185921_R013_T10TDK_20240706T050412',
 'S2B_MSIL2A_20230713T184939_R113_T10SEJ_20241016T032119']

In [30]:
product_example_id = product_list[0]
product_example_id

'S2A_MSIL2A_20230721T185921_R013_T10SDJ_20240911T103205'

### Downloading and processing Sentinel 2 products

#### Downloading and preparing Sentinel 2 products

In [31]:
product_asset_list = []
bands = ["B02", "B03", "B04", "B08", "visual"]
download_directory = DATA_DIR / "example_s2_download_and_processing"

for p in product_list:
    processed_product = download_and_process_sentinel2_asset(product_id=p, 
                                                             product_bands=bands,
                                                             base_directory=download_directory,
                                                             target_projection=5070)
                                                    
    product_asset_list.append(processed_product)

[2026-01-29 16:51:35] INFO       [MainThread][geospatial_tools.planetary_computer.sentinel_2] Reprojected file [/home/francispelletier/projects/geospatial_tools/data/example_s2_download_and_processing/S2B_MSIL2A_20230713T184939_R113_T10SEJ_20241016T032119_reprojected.tif] already exists


In [32]:
for p in product_asset_list:
    print(f"Asset ID : [{p.asset_id}]")
    print(f"Reprojected ID path : \n[{p.reprojected_asset_path}]\n")


Asset ID : [S2A_MSIL2A_20230721T185921_R013_T10SDJ_20240911T103205]
Reprojected ID path : 
[/home/francispelletier/projects/geospatial_tools/data/example_s2_download_and_processing/S2A_MSIL2A_20230721T185921_R013_T10SDJ_20240911T103205_reprojected.tif]

Asset ID : [S2A_MSIL2A_20240705T185921_R013_T10TDK_20240706T050412]
Reprojected ID path : 
[/home/francispelletier/projects/geospatial_tools/data/example_s2_download_and_processing/S2A_MSIL2A_20240705T185921_R013_T10TDK_20240706T050412_reprojected.tif]

Asset ID : [S2B_MSIL2A_20230713T184939_R113_T10SEJ_20241016T032119]
Reprojected ID path : 
[/home/francispelletier/projects/geospatial_tools/data/example_s2_download_and_processing/S2B_MSIL2A_20230713T184939_R113_T10SEJ_20241016T032119_reprojected.tif]



In [33]:
# Here, we are creating a new Asset object simply for convenience, from the printed outputs above

product = Asset(asset_id=product_example_id,
                reprojected_asset=download_directory / f"{product_example_id}_reprojected.tif", )

#### Creating a new geodataframe of all the vector polygons that are within our selected product

In [34]:
s2_product_id = product.asset_id
product_path = product.reprojected_asset_path
product_id_series = group_by_product[group_by_product["best_s2_product_id"] == s2_product_id]
# Since it's grouped by product id, there should always be only one row in the series
feature_ids = product_id_series["feature_id"].iloc[0]
vector_features = best_results[best_results["feature_id"].isin(feature_ids)]
vector_features_path = DATA_DIR / "vector_features.gpkg"
to_geopackage(vector_features, DATA_DIR / "vector_features.gpkg")

print(vector_features)

                                              geometry  \
75   POLYGON ((-2311113.743 2081469.316, -2306113.7...   
76   POLYGON ((-2306113.743 2081469.316, -2301113.7...   
77   POLYGON ((-2301113.743 2081469.316, -2296113.7...   
78   POLYGON ((-2296113.743 2081469.316, -2291113.7...   
79   POLYGON ((-2291113.743 2081469.316, -2286113.7...   
..                                                 ...   
814  POLYGON ((-2331113.743 2186469.316, -2326113.7...   
815  POLYGON ((-2326113.743 2186469.316, -2321113.7...   
816  POLYGON ((-2321113.743 2186469.316, -2316113.7...   
817  POLYGON ((-2316113.743 2186469.316, -2311113.7...   
850  POLYGON ((-2331113.743 2191469.316, -2326113.7...   

                               feature_id s2_tiles  \
75   af641811-b4d7-4666-acbe-16207c4ded34  [10SDJ]   
76   b47a8abb-db5a-40c1-abff-4def8a2638b7  [10SDJ]   
77   f41cf1b1-a85d-4d12-be54-3dc4f486914d  [10SDJ]   
78   adf62eac-bc81-4709-a792-6b0f69a4ed53  [10SDJ]   
79   0eb28f2a-7b66-4a62-a513-1d45

#### Creating our Sentinel 2 "chips" by clipping main products with our vector polygon grid

In [35]:
clip_raster_with_polygon(raster_image=product_path,
                         polygon_layer=vector_features_path,
                         base_output_filename=s2_product_id,
                         output_dir=download_directory / "test_sentinel2_clip")

[PosixPath('/home/francispelletier/projects/geospatial_tools/data/example_s2_download_and_processing/test_sentinel2_clip/S2A_MSIL2A_20230721T185921_R013_T10SDJ_20240911T103205_clipped_5.tif'),
 PosixPath('/home/francispelletier/projects/geospatial_tools/data/example_s2_download_and_processing/test_sentinel2_clip/S2A_MSIL2A_20230721T185921_R013_T10SDJ_20240911T103205_clipped_6.tif'),
 PosixPath('/home/francispelletier/projects/geospatial_tools/data/example_s2_download_and_processing/test_sentinel2_clip/S2A_MSIL2A_20230721T185921_R013_T10SDJ_20240911T103205_clipped_8.tif'),
 PosixPath('/home/francispelletier/projects/geospatial_tools/data/example_s2_download_and_processing/test_sentinel2_clip/S2A_MSIL2A_20230721T185921_R013_T10SDJ_20240911T103205_clipped_11.tif'),
 PosixPath('/home/francispelletier/projects/geospatial_tools/data/example_s2_download_and_processing/test_sentinel2_clip/S2A_MSIL2A_20230721T185921_R013_T10SDJ_20240911T103205_clipped_10.tif'),
 PosixPath('/home/francispelletie