In [1]:
import leafmap
import geopandas as gpd
from geospatial_tools import DATA_DIR
from geospatial_tools.utils import get_yaml_config, download_url, unzip_file


## Base data

The USA polygon is base off 2018's `cb_2018_us_nation_5m` shapefile, taken from here: 
https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html

It was then processed using QGIS to keep only the contiguous states, without any islands.

The Sentinel 2 grid was taken from the kml file found here: 
https://sentiwiki.copernicus.eu/web/s2-products

Below is some code to help with the download part.

### Downloading data
Let's download our source data

In [2]:
file_configs = get_yaml_config("data_file_links")
raw_usa_polygon_path = file_configs["united_states_polygon"]["url"]
raw_s2_tiling_grid_path = file_configs["sentinel_2_tiling_grid"]["url"]
download_list = {"raw_usa_polygon" : raw_usa_polygon_path, "raw_s2_tiling_grid" : raw_s2_tiling_grid_path}
file_list = [download_url(url=url, filename=f"{DATA_DIR}/{key}.zip") for key,url in download_list.items()]

file_list



[2024-08-20 17:48:22] INFO       [MainThread][geospatial_tools.utils] Yaml config file [/home/francispelletier/projects/geospatial_tools/configs/data_file_links.yaml] found.
[2024-08-20 17:48:22] INFO       [MainThread][geospatial_tools.utils] Loading YAML config file [/home/francispelletier/projects/geospatial_tools/configs/data_file_links.yaml].
[2024-08-20 17:48:22] INFO       [MainThread][geospatial_tools.utils] Downloaded /home/francispelletier/projects/geospatial_tools/data/raw_usa_polygon.zip successfully.
[2024-08-20 17:48:23] INFO       [MainThread][geospatial_tools.utils] Downloaded /home/francispelletier/projects/geospatial_tools/data/raw_s2_tiling_grid.zip successfully.


['/home/francispelletier/projects/geospatial_tools/data/raw_usa_polygon.zip',
 '/home/francispelletier/projects/geospatial_tools/data/raw_s2_tiling_grid.zip']

In [3]:
[unzip_file(zip_path=f, extract_to=DATA_DIR) for f in file_list]

[2024-08-20 17:48:23] INFO       [MainThread][geospatial_tools.utils] Extracted: [cb_2018_us_nation_20m.shp.ea.iso.xml]
[2024-08-20 17:48:23] INFO       [MainThread][geospatial_tools.utils] Extracted: [cb_2018_us_nation_20m.shp.iso.xml]
[2024-08-20 17:48:23] INFO       [MainThread][geospatial_tools.utils] Extracted: [cb_2018_us_nation_20m.shp]
[2024-08-20 17:48:23] INFO       [MainThread][geospatial_tools.utils] Extracted: [cb_2018_us_nation_20m.shx]
[2024-08-20 17:48:23] INFO       [MainThread][geospatial_tools.utils] Extracted: [cb_2018_us_nation_20m.dbf]
[2024-08-20 17:48:23] INFO       [MainThread][geospatial_tools.utils] Extracted: [cb_2018_us_nation_20m.prj]
[2024-08-20 17:48:23] INFO       [MainThread][geospatial_tools.utils] Extracted: [cb_2018_us_nation_20m.cpg]
[2024-08-20 17:48:24] INFO       [MainThread][geospatial_tools.utils] Extracted: [S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml]


[None, None]

### Initial pre-processing

The above layers were processed using QGIS.

For the purpose of this analysis, only the contiguous lower 48 states have been conserved; smaller islands/land masses 
have also been striped.

The S2 tiling grid has been trimmed to keep only the grid cells that overlap with the 
contiguous states.

Since our area of study is quite large, the `EPSG:5070` projection was chosen, as it
covers the whole area, introduces minimal distortion while preserving area.

The files below have also been saved in this repository.

In [4]:
USA_POLYGON_FILE = DATA_DIR / "usa_polygon_5070.gpkg"
S2_USA_GRID_FILE = DATA_DIR / "s2_grid_usa_polygon_5070.gpkg"
usa_polygon = gpd.read_file(USA_POLYGON_FILE)
s2_grid = gpd.read_file(S2_USA_GRID_FILE)

In [5]:
usa_polygon

Unnamed: 0,AFFGEOID,GEOID,NAME,geometry
0,0100000US,US,United States,"MULTIPOLYGON (((-2116048.733 3142966.552, -211..."


In [6]:
s2_grid

Unnamed: 0,name,folders,description,geometry
0,12TUP,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-1386334.944 2487548.770 0.0...
1,12TYQ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-976300.478 2523767.452 0.00...
2,12TYR,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-960099.705 2622374.255 0.00...
3,12TYN,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-1008622.024 2325748.358 0.0...
4,12TYP,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-992478.385 2424861.340 0.00...
...,...,...,...,...
977,12TTM,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-1515431.586 2304192.826 0.0...
978,12TUK,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-1448525.813 2089886.667 0.0...
979,12TUQ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-1371006.917 2586590.133 0.0...
980,12TUR,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-1355793.563 2685354.080 0.0...


In [7]:
m = leafmap.Map(center=[40, -98], zoom=4)

# In blue, the USA polygon
m.add_gdf(usa_polygon, layer_name='usa')
# In red, the Sentinel 2 grid
m.add_gdf(s2_grid, layer_name='s2_grid', style={"color": "red"})

m

Map(center=[40, -98], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_te…

## Creating our grid

From this, we want to create a grid of square polygons with which we will later on
query the [Planetary Computer](https://planetarycomputer.microsoft.com/dataset/sentinel-2-l2a)
Sentinel 2 dataset and clip the selected Sentinel 2 images.

For the purpose of this notebook, the grid that will be created will use 10km by 10km squares to speed up 
processing.

In [8]:
from geospatial_tools.vector import create_vector_grid_parallel, to_geopackage, select_polygons_by_location

In [9]:
grid_size = 5000
bbox = usa_polygon.total_bounds
grid_5km_filename = DATA_DIR / "polygon_grid_5km.gpkg"

In [10]:
print("Starting processing for [create_vector_grid_parallel]")
grid_5km = create_vector_grid_parallel(bounding_box=bbox, grid_size=grid_size, crs="EPSG:5070")
print(f"Printing len(grid_parallel) to check if grid contains same amount of polygons : {len(grid_5km)}")
to_geopackage(gdf=grid_5km, filename=grid_5km_filename)
grid_5km

Starting processing for [create_vector_grid_parallel]
[2024-08-20 17:48:24] INFO       [MainThread][geospatial_tools.vector] Creating grid coordinates for bounding box [[-2356113.74289801   301469.31619713  2258154.44089948  3165721.6501298 ]]
[2024-08-20 17:48:24] INFO       [MainThread][geospatial_tools.vector] Creating flattened grid coordinates
[2024-08-20 17:48:25] INFO       [MainThread][geospatial_tools.vector] Number of workers used: 16
[2024-08-20 17:48:25] INFO       [MainThread][geospatial_tools.vector] Allocating polygon array for [528879] polygons
[2024-08-20 17:48:25] INFO       [MainThread][geospatial_tools.vector] Creating polygons from chunks
Printing len(grid_parallel) to check if grid contains same amount of polygons : 528879
[2024-08-20 17:48:30] INFO       [MainThread][geospatial_tools.vector] Starting writing process
[2024-08-20 17:48:32] INFO       [MainThread][geospatial_tools.vector] File [/home/francispelletier/projects/geospatial_tools/data/polygon_grid_5km.g

Unnamed: 0,geometry
0,"POLYGON ((-2356113.743 301469.316, -2351113.74..."
1,"POLYGON ((-2351113.743 301469.316, -2346113.74..."
2,"POLYGON ((-2346113.743 301469.316, -2341113.74..."
3,"POLYGON ((-2341113.743 301469.316, -2336113.74..."
4,"POLYGON ((-2336113.743 301469.316, -2331113.74..."
...,...
528874,"POLYGON ((2233886.257 3161469.316, 2238886.257..."
528875,"POLYGON ((2238886.257 3161469.316, 2243886.257..."
528876,"POLYGON ((2243886.257 3161469.316, 2248886.257..."
528877,"POLYGON ((2248886.257 3161469.316, 2253886.257..."


### Selecting the useful polygons

Now, since our grid was created using the extent of our input polygon (continental USA), we need to filter out the polygons that do not intersect with it.

Doing this in Python is not the most efficient way to do things, but since it's a step that shouldn't be done over and over, it's not that critical.

If ever you need to do this step in an efficient way because the data is just too big or too complex, it would be better off going through QGIS, PyGQIS, GDAL or 
some other more efficient way to do this operation. 

In [11]:
intersecting_polygons_filename = DATA_DIR / "intersecting_polygon_grid_5km.gpkg"
print("Starting intersect selection")
intersecting_polygons = select_polygons_by_location(grid_5km, usa_polygon)

# Optionally save to a new file
to_geopackage(intersecting_polygons, intersecting_polygons_filename)

Starting intersect selection
[2024-08-20 17:48:32] INFO       [MainThread][geospatial_tools.vector] Number of workers used: 16
[2024-08-20 17:48:41] INFO       [MainThread][geospatial_tools.vector] Starting writing process
[2024-08-20 17:48:42] INFO       [MainThread][geospatial_tools.vector] File [/home/francispelletier/projects/geospatial_tools/data/intersecting_polygon_grid_5km.gpkg] took 1.0421457290649414 seconds to write.


PosixPath('/home/francispelletier/projects/geospatial_tools/data/intersecting_polygon_grid_5km.gpkg')

### Visualizing the selected polygons

This will take more or less time, depending on the number on polygons.

(Do not try this with a grid size smaller than 10000m)

In [12]:
# This takes a few minutes and navigation will be slow
# m_intersecting_polygons = leafmap.Map(center=[40, -98], zoom=4)
# m_intersecting_polygons.add_gdf(intersecting_polygons, layer_name='intersecting_polygons', style={"color": "blue"})
# m_intersecting_polygons

## Making of list of all the S2 tiling grids we will be using

Now, we need to build our S2 tile grid list

In [13]:
s2_tile_grid_list = s2_grid["name"].to_list()
s2_tile_grid_list

['12TUP',
 '12TYQ',
 '12TYR',
 '12TYN',
 '12TYP',
 '12TYS',
 '12TYT',
 '11SMB',
 '11SMC',
 '11SLV',
 '11SMA',
 '11SMS',
 '12UUV',
 '11SMT',
 '11SMD',
 '11SMR',
 '12UUU',
 '11SNA',
 '12TWS',
 '11SNB',
 '12TWT',
 '11SMU',
 '12TWQ',
 '11SMV',
 '12TWR',
 '12TXM',
 '11SNS',
 '12TXN',
 '11SNC',
 '12TXK',
 '11SND',
 '12TXL',
 '11SKD',
 '12TXR',
 '12TXS',
 '11SKB',
 '12TXP',
 '11SKC',
 '12TXQ',
 '11SKU',
 '12TYL',
 '11SKV',
 '12TYM',
 '12TXT',
 '11SKT',
 '12TYK',
 '11SLC',
 '11SLD',
 '11SLA',
 '11SLB',
 '11SLT',
 '11SLU',
 '19TBF',
 '19TBG',
 '19TDM',
 '19TDN',
 '19TEJ',
 '19TEK',
 '19TEN',
 '19TEL',
 '19TEM',
 '18STE',
 '18STF',
 '19TCG',
 '18STC',
 '19TCH',
 '18STD',
 '18STJ',
 '19TCF',
 '19TCL',
 '18STG',
 '19TCM',
 '18STH',
 '19TCJ',
 '19TCK',
 '19TDF',
 '19TDG',
 '19TDK',
 '19TDL',
 '19TDJ',
 '16TCQ',
 '16TCR',
 '16TCN',
 '16TCP',
 '16TDK',
 '16TDL',
 '16TCS',
 '16TCT',
 '16TDP',
 '15RWQ',
 '16TDQ',
 '16TDM',
 '15RXN',
 '16TDN',
 '15RXP',
 '16TDT',
 '16TEK',
 '16TDR',
 '16TDS',
 '16SGB',


## Exploring S2 STAC catalog tools

In [14]:
from pathlib import Path
from geospatial_tools import DATA_DIR
from geospatial_tools.stac import StacSearch, PLANETARY_COMPUTER
from geospatial_tools.utils import create_date_range_for_specific_period

In [15]:

start_year = 2023
end_year = 2024
start_month = 6
end_month = 7
date_ranges = create_date_range_for_specific_period(start_year=start_year, end_year=end_year,
                                                    start_month_range=start_month, end_month_range=end_month)

search_client = StacSearch(PLANETARY_COMPUTER)

collection = "sentinel-2-l2a"
tile_ids = ["10SDJ"]
query = {"eo:cloud_cover": {"lt": 1}, "s2:mgrs_tile": {"in": tile_ids}}
sortby = [{"field": "properties.eo:cloud_cover", "direction": "asc"}]

results = search_client.stac_api_search_for_date_ranges(date_ranges=date_ranges, collections=collection, query=query,
                                                        sortby=sortby, max_items=20)

sorted_items = search_client.sort_results_by_cloud_coverage()
optimal_result = sorted_items[0]

for item in sorted_items:
    print(f"Item: {item.id}, {item.datetime}, {item.properties['eo:cloud_cover']}")
print(f"Optimal result: {optimal_result.id}, {optimal_result.datetime}, {optimal_result.properties['eo:cloud_cover']}")

[2024-08-20 17:48:43] INFO       [MainThread][geospatial_tools.stac] Initiating STAC API search for the following date ranges : [['2023-06-01T00:00:00Z/2023-07-31T23:59:59Z', '2024-06-01T00:00:00Z/2024-07-31T23:59:59Z'] 
	Query : [{'eo:cloud_cover': {'lt': 1}, 's2:mgrs_tile': {'in': ['10SDJ']}}]
[2024-08-20 17:48:43] INFO       [MainThread][geospatial_tools.stac] Search successful for date range [2023-06-01T00:00:00Z/2023-07-31T23:59:59Z]
[2024-08-20 17:48:44] INFO       [MainThread][geospatial_tools.stac] Search successful for date range [2024-06-01T00:00:00Z/2024-07-31T23:59:59Z]
[2024-08-20 17:48:44] INFO       [MainThread][geospatial_tools.stac] Sorting results by cloud cover (from least to most)
Item: S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744, 2024-07-12 18:49:31.024000+00:00, 5.8e-05
Item: S2A_MSIL2A_20240722T184921_R113_T10SDJ_20240723T033105, 2024-07-22 18:49:21.024000+00:00, 0.000137
Item: S2A_MSIL2A_20240612T184921_R113_T10SDJ_20240613T042051, 2024-06-12 18:49:21

In [16]:
bands = ["B02", "B03", "B04", "B08", "visual"]
file_base_path = Path(f"{DATA_DIR}/sentinel-2/")
best_result = search_client.download_best_cloud_cover_results(bands=bands, base_directory=file_base_path)
best_result

[2024-08-20 17:48:44] INFO       [MainThread][geospatial_tools.stac] Downloading [S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744] ...
[2024-08-20 17:48:44] INFO       [MainThread][geospatial_tools.stac] Downloading B02 from https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/10/S/DJ/2024/07/12/S2A_MSIL2A_20240712T184931_N0510_R113_T10SDJ_20240713T042744.SAFE/GRANULE/L2A_T10SDJ_A047300_20240712T185252/IMG_DATA/R10m/T10SDJ_20240712T184931_B02_10m.tif?st=2024-08-19T21%3A48%3A43Z&se=2024-08-20T22%3A33%3A43Z&sp=rl&sv=2024-05-04&sr=c&skoid=9c8ff44a-6a2c-4dfb-b298-1c9212f64d9a&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2024-08-20T16%3A26%3A29Z&ske=2024-08-27T16%3A26%3A29Z&sks=b&skv=2024-05-04&sig=LkjROcbPGIGYWAxE7l9nRuZw9WRL%2B44KNqXZMdzPgmg%3D
[2024-08-20 17:48:49] INFO       [MainThread][geospatial_tools.utils] Downloaded /home/francispelletier/projects/geospatial_tools/data/sentinel-2/S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744_B02.tif successfully.
[2024-08-20

<geospatial_tools.stac.Asset at 0x7a9ebd9c8cd0>

In [17]:
best_result.show_asset_items()

[2024-08-20 17:49:15] INFO       [MainThread][geospatial_tools.stac] Asset list for asset [S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744] : 
	['ID: [S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744], Band: [B02], filename: [/home/francispelletier/projects/geospatial_tools/data/sentinel-2/S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744_B02.tif]', 'ID: [S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744], Band: [B03], filename: [/home/francispelletier/projects/geospatial_tools/data/sentinel-2/S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744_B03.tif]', 'ID: [S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744], Band: [B04], filename: [/home/francispelletier/projects/geospatial_tools/data/sentinel-2/S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744_B04.tif]', 'ID: [S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744], Band: [B08], filename: [/home/francispelletier/projects/geospatial_tools/data/sentinel-2/S2A_MSIL2A_20240712T184931_R113_T10SDJ_202

In [18]:
merged = best_result.merge_asset(delete_sub_items=True)
merged

[2024-08-20 17:49:15] INFO       [MainThread][geospatial_tools.stac] Calculated a total of [7] bands
[2024-08-20 17:49:15] INFO       [MainThread][geospatial_tools.stac] 7
[2024-08-20 17:49:15] INFO       [MainThread][geospatial_tools.stac] Creating merged asset metadata
[2024-08-20 17:49:15] INFO       [MainThread][geospatial_tools.stac] Merging asset [S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744] ...
[2024-08-20 17:49:15] INFO       [MainThread][geospatial_tools.stac] Writing band image: S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744
[2024-08-20 17:49:15] INFO       [MainThread][geospatial_tools.stac] writing asset sub item band 1
[2024-08-20 17:49:15] INFO       [MainThread][geospatial_tools.stac] writing merged index band 1
[2024-08-20 17:49:16] INFO       [MainThread][geospatial_tools.stac] Writing band image: S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744
[2024-08-20 17:49:16] INFO       [MainThread][geospatial_tools.stac] writing asset sub item band 1
[

PosixPath('S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744_merged.tif')

In [19]:
reprojected = best_result.reproject_merged_asset(target_projection=5070, delete_merged_asset=True)
reprojected

[2024-08-20 17:49:23] INFO       [MainThread][geospatial_tools.stac] Reprojecting asset [S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744] ...
[2024-08-20 17:49:23] INFO       [MainThread][geospatial_tools.stac] Creating EPSG code from following input : [5070]
[2024-08-20 17:49:58] INFO       [MainThread][geospatial_tools.stac] Reprojected file created at S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744_reprojected.tif
[2024-08-20 17:49:58] INFO       [MainThread][geospatial_tools.stac] Asset location : [S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744_reprojected.tif]
[2024-08-20 17:49:58] INFO       [MainThread][geospatial_tools.stac] Deleting merged asset file for [S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744_merged.tif]


PosixPath('S2A_MSIL2A_20240712T184931_R113_T10SDJ_20240713T042744_reprojected.tif')

# Finding the best image for each S2 tiling grid

### Subselection
This is a long list, so we'll be continuing this notebook with the following subset

In [20]:
from geospatial_tools.utils import create_date_range_for_specific_period
from geospatial_tools.planetary_computer.sentinel_2 import find_best_image_per_s2_tile, write_results_to_file

In [21]:
s2_tile_grid_subset_list = ["10TDK", "10TEK", "10SDJ", "10SEJ"]

In [22]:
s2_tile_grid_list = s2_tile_grid_subset_list

start_year = 2020
end_year = 2024
start_month = 6
end_month = 7

date_ranges_2020_2024 = create_date_range_for_specific_period(start_year=start_year, end_year=end_year,
                                                              start_month_range=start_month,
                                                              end_month_range=end_month)
max_cloud_cover = 15

tile_dictionary, error_list = find_best_image_per_s2_tile(date_ranges=date_ranges_2020_2024,
                                                          cloud_cover=max_cloud_cover,
                                                          s2_tile_grid_list=s2_tile_grid_list)

results_dict = write_results_to_file(cloud_cover=max_cloud_cover, tile_dictionary=tile_dictionary, error_list=error_list)
results_dict

[2024-08-20 17:49:59] INFO       [ThreadPoolExecutor-0_0][geospatial_tools.stac] Initiating STAC API search for the following date ranges : [['2020-06-01T00:00:00Z/2020-07-31T23:59:59Z', '2021-06-01T00:00:00Z/2021-07-31T23:59:59Z', '2022-06-01T00:00:00Z/2022-07-31T23:59:59Z', '2023-06-01T00:00:00Z/2023-07-31T23:59:59Z', '2024-06-01T00:00:00Z/2024-07-31T23:59:59Z'] 
	Query : [{'eo:cloud_cover': {'lt': 15}, 's2:mgrs_tile': {'in': ['10TDK']}}]
[2024-08-20 17:49:59] INFO       [ThreadPoolExecutor-0_2][geospatial_tools.stac] Initiating STAC API search for the following date ranges : [['2020-06-01T00:00:00Z/2020-07-31T23:59:59Z', '2021-06-01T00:00:00Z/2021-07-31T23:59:59Z', '2022-06-01T00:00:00Z/2022-07-31T23:59:59Z', '2023-06-01T00:00:00Z/2023-07-31T23:59:59Z', '2024-06-01T00:00:00Z/2024-07-31T23:59:59Z'] 
	Query : [{'eo:cloud_cover': {'lt': 15}, 's2:mgrs_tile': {'in': ['10SDJ']}}]
[2024-08-20 17:49:59] INFO       [ThreadPoolExecutor-0_3][geospatial_tools.stac] Initiating STAC API search fo

{'tile_filename': PosixPath('/home/francispelletier/projects/geospatial_tools/data/data_lt{cloud_cover}cc.json'),
 'errors_filename': 'None'}

## Data processing pipeline prototype

### Building our processing list

First, let's make create a subselection of our dataset

In [23]:
import geopandas as gpd
from geospatial_tools import DATA_DIR
from geospatial_tools.vector import spatial_join_within

In [24]:
S2_USA_GRID_FILE = DATA_DIR / "s2_grid_usa_polygon_5070.gpkg"
s2_grid = gpd.read_file(S2_USA_GRID_FILE)
s2_grid

Unnamed: 0,name,folders,description,geometry
0,12TUP,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-1386334.944 2487548.770 0.0...
1,12TYQ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-976300.478 2523767.452 0.00...
2,12TYR,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-960099.705 2622374.255 0.00...
3,12TYN,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-1008622.024 2325748.358 0.0...
4,12TYP,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-992478.385 2424861.340 0.00...
...,...,...,...,...
977,12TTM,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-1515431.586 2304192.826 0.0...
978,12TUK,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-1448525.813 2089886.667 0.0...
979,12TUQ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-1371006.917 2586590.133 0.0...
980,12TUR,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-1355793.563 2685354.080 0.0...


In [25]:
s2_grid_subset = s2_grid[s2_grid["name"].isin(s2_tile_grid_subset_list)]
s2_grid_subset

Unnamed: 0,name,folders,description,geometry
823,10SEJ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-2262059.689 2182514.584 0.0...
841,10SDJ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-2357236.175 2210256.163 0.0...
846,10TDK,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-2329019.777 2307113.875 0.0...
857,10TEK,Features,TILE PROPERTIES<br><table border=0 cellpadding...,MULTIPOLYGON Z (((-2233778.019 2279366.081 0.0...


In [26]:
intersect_polygons_subset_filename = DATA_DIR / "intersecting_polygon_grid_5000_subset.gpkg"
print("Starting intersect selection")
intersect_polygons_subset = select_polygons_by_location(intersecting_polygons, s2_grid_subset, predicate="within")

# Optionally save to a new file
to_geopackage(intersect_polygons_subset, intersect_polygons_subset_filename)

Starting intersect selection
[2024-08-20 17:50:08] INFO       [MainThread][geospatial_tools.vector] Number of workers used: 16
[2024-08-20 17:50:10] INFO       [MainThread][geospatial_tools.vector] Starting writing process
[2024-08-20 17:50:10] INFO       [MainThread][geospatial_tools.vector] File [/home/francispelletier/projects/geospatial_tools/data/intersecting_polygon_grid_5000_subset.gpkg] took 0.024418115615844727 seconds to write.


PosixPath('/home/francispelletier/projects/geospatial_tools/data/intersecting_polygon_grid_5000_subset.gpkg')

In [40]:
m_subset = leafmap.Map(center=[39.7, -123], zoom=8)
m_subset.add_gdf(s2_grid_subset, layer_name='s2_tiles', style={"color": "red"})
m_subset.add_gdf(intersect_polygons_subset, layer_name='intersecting_polygons', style={"color": "blue"})
m_subset

ValueError: Can't clean for JSON: UUID('c509feaa-637b-4263-a9f9-8c02b4f32159')

In [30]:
s2_feature_name_columns = "name"
vector_column_name = "s2_tiles"
results = spatial_join_within(
        polygon_features=s2_grid_subset,
        polygon_column=s2_feature_name_columns,
        vector_features=intersect_polygons_subset,
        vector_column_name=vector_column_name,
    )
print("Writing vector tiles to file")
to_geopackage(results, DATA_DIR / "vector_tiles_with_s2tiles_subset.gpkg")
results

[2024-08-20 17:52:30] INFO       [MainThread][geospatial_tools.vector] Creating temporary UUID field for join operations
[2024-08-20 17:52:30] INFO       [MainThread][geospatial_tools.vector] Starting process to find and identify contained features using spatial 'within' join operation
[2024-08-20 17:52:30] INFO       [MainThread][geospatial_tools.vector] Grouping results
[2024-08-20 17:52:30] INFO       [MainThread][geospatial_tools.vector] Cleaning and merging results
[2024-08-20 17:52:30] INFO       [MainThread][geospatial_tools.vector] Spatial join operation is completed
Writing vector tiles to file
[2024-08-20 17:52:30] INFO       [MainThread][geospatial_tools.vector] Starting writing process
[2024-08-20 17:52:30] INFO       [MainThread][geospatial_tools.vector] File [/home/francispelletier/projects/geospatial_tools/data/vector_tiles_with_s2tiles_subset.gpkg] took 0.01780533790588379 seconds to write.


Unnamed: 0,geometry,s2_tiles
0,"POLYGON ((-2206113.743 2051469.316, -2201113.7...",[10SEJ]
1,"POLYGON ((-2201113.743 2051469.316, -2196113.7...",[10SEJ]
2,"POLYGON ((-2196113.743 2051469.316, -2191113.7...",[10SEJ]
3,"POLYGON ((-2221113.743 2056469.316, -2216113.7...",[10SEJ]
4,"POLYGON ((-2216113.743 2056469.316, -2211113.7...",[10SEJ]
...,...,...
1571,"POLYGON ((-2306113.743 2291469.316, -2301113.7...",[10TDK]
1572,"POLYGON ((-2301113.743 2291469.316, -2296113.7...",[10TDK]
1573,"POLYGON ((-2326113.743 2296469.316, -2321113.7...",[10TDK]
1574,"POLYGON ((-2321113.743 2296469.316, -2316113.7...",[10TDK]


In [41]:
m_subset_results = leafmap.Map(center=[39.7, -123], zoom=8)
m_subset_results.add_gdf(s2_grid_subset, layer_name='s2_tiles', style={"color": "red"})
m_subset_results.add_gdf(results, layer_name='vector_tiles_s2_grid', style={"color": "green"})
m_subset_results

Map(center=[39.7, -123], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out…