In [5]:
import leafmap
import geopandas as gpd
from geospatial_tools import DATA_DIR

## Base data

The USA polygon is base off 2018's `cb_2018_us_nation_5m` shapefile, taken from here: 
https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html

It was then processed using QGIS to keep only the contiguous states, without any islands.

The Sentinel 2 grid was taken from the kml file found here: 
https://sentinels.copernicus.eu/web/sentinel/missions/sentinel-2/data-products

It was then processed using QGIS to keep only the grid cells that overlap with the 
contiguous states, meaning the polygon layer which is described just above.

Since our area of study is quite large, the `EPSG:5070` projection was chosen, as it
covers the whole area, introduces minimal distortion while preserving area.

In [3]:
USA_POLYGON_FILE = DATA_DIR / "usa/usa_polygon_5070.gpkg"
S2_USA_GRID_FILE = DATA_DIR / "usa/s2_grid_usa_polygon_5070.gpkg"

In [27]:
usa_polygon = gpd.read_file(USA_POLYGON_FILE)
s2_grid = gpd.read_file(S2_USA_GRID_FILE)

In [28]:
usa_polygon

Unnamed: 0,AFFGEOID,GEOID,NAME,geometry
0,0100000US,US,United States,"MULTIPOLYGON (((-2123555.702 3120381.564, -212..."


In [29]:
s2_grid

Unnamed: 0,name,folders,description,altitude,alt_mode,time_begin,time_end,time_when,geometry
0,12TUP,Features,TILE PROPERTIES<br><table border=0 cellpadding...,0.0,,,,,MULTIPOLYGON Z (((-1386334.944 2487548.770 0.0...
1,12TYQ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,0.0,,,,,MULTIPOLYGON Z (((-976300.478 2523767.452 0.00...
2,12TYR,Features,TILE PROPERTIES<br><table border=0 cellpadding...,0.0,,,,,MULTIPOLYGON Z (((-960099.705 2622374.255 0.00...
3,12TYN,Features,TILE PROPERTIES<br><table border=0 cellpadding...,0.0,,,,,MULTIPOLYGON Z (((-1008622.024 2325748.358 0.0...
4,12TYP,Features,TILE PROPERTIES<br><table border=0 cellpadding...,0.0,,,,,MULTIPOLYGON Z (((-992478.385 2424861.340 0.00...
...,...,...,...,...,...,...,...,...,...
977,12TTM,Features,TILE PROPERTIES<br><table border=0 cellpadding...,0.0,,,,,MULTIPOLYGON Z (((-1515431.586 2304192.826 0.0...
978,12TUK,Features,TILE PROPERTIES<br><table border=0 cellpadding...,0.0,,,,,MULTIPOLYGON Z (((-1448525.813 2089886.667 0.0...
979,12TUQ,Features,TILE PROPERTIES<br><table border=0 cellpadding...,0.0,,,,,MULTIPOLYGON Z (((-1371006.917 2586590.133 0.0...
980,12TUR,Features,TILE PROPERTIES<br><table border=0 cellpadding...,0.0,,,,,MULTIPOLYGON Z (((-1355793.563 2685354.080 0.0...


In [30]:
m = leafmap.Map(center=[40, -98], zoom=4)

# In blue, the USA polygon
m.add_gdf(usa_polygon, layer='usa')
# In red, the Sentinel 2 grid
m.add_gdf(s2_grid, layer='s2_grid', style={"color": "red"})

m

Map(center=[40, -98], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_te…

## Creating our inference grid

From this, we want to create a grid of square polygons with which we will later on
query the [Planetary Computer](https://planetarycomputer.microsoft.com/dataset/sentinel-2-l2a)
Sentinel 2 dataset and clip the selected Sentinel 2 images.

In [4]:
import time
from geospatial_tools.vector import create_vector_grid_parallel, to_geopackage, select_polygons_by_location
import pandas as pd
import numpy as np
from concurrent.futures import ProcessPoolExecutor

In [32]:
grid_size = 5000
bbox = usa_polygon.total_bounds

In [None]:
start = time.time()
print("Starting processing for [create_vector_grid_parallel]")
grid_parallel = create_vector_grid_parallel(bounding_box=bbox, grid_size=grid_size, crs="EPSG:5070")
stop = time.time()
print(f"Printing len(grid_parallel) to check if grid contains same amount of polygons : {len(grid_parallel)}")
print(f"Time taken to create parallel grid: {stop - start}")
to_geopackage(gdf=grid_parallel, filename="polygon_grid.gpkg")

Starting processing for [create_vector_grid_parallel]
[2024-06-03 16:00:29] INFO       [MainThread][geospatial_tools.vector] Creating grid coordinates for bounding box [[-2356113.74289801   310919.59963659  2258200.17691555  3165721.6501298 ]]
[2024-06-03 16:00:29] INFO       [MainThread][geospatial_tools.vector] Creating flattened grid coordinates
[2024-06-03 16:00:29] INFO       [MainThread][geospatial_tools.vector] Number of workers used: 16
[2024-06-03 16:00:29] INFO       [MainThread][geospatial_tools.vector] Allocating polygon array for [13175825] polygons
[2024-06-03 16:00:29] INFO       [MainThread][geospatial_tools.vector] Creating polygons from chunk


### Selecting the useful polygons

Now, since our grid was created using the extent of our input polygon (continental USA), we need to filter out the polygons that do not intersect with it.

Doing this in Python is not the most efficient way to do things, but since it's a step that shouldn't be done over and over, it's not that critical.

If ever you need to do this step in an efficient way because the data is just too big or too complex, it would be better off going through QGIS, PyGQIS, GDAL or 
some other more efficient way to do this operation. 

In [None]:
start = time.time()
print("Starting intersect selection using for loop")
intersecting_polygons = select_polygons_by_location(grid_parallel, usa_polygon)
stop = time.time()
print(f"Time taken to intersect using for loop: {stop - start}")
# Optionally, save to a new file

### Visualizing the selected polygons

This will take more or less time, depending on the number on polygons. 

In [None]:
m.add_gdf(intersecting_polygons, layer='intersecting_polygons', style={"color": "blue"})
m

In [1]:
%pip list | grep planetary

planetary-computer        1.0.0
Note: you may need to restart the kernel to use updated packages.


## Exploring S2 STAC catalog tools

In [1]:
from pathlib import Path
from geospatial_tools import DATA_DIR
from geospatial_tools.stac import StacSearch, PLANETARY_COMPUTER
from geospatial_tools.utils import create_date_range_for_specific_period
from geospatial_tools.raster import reproject_raster

In [2]:

start_year = 2021 
end_year = 2023
start_month = 6
end_month = 7
date_ranges = create_date_range_for_specific_period(start_year=start_year, end_year=end_year, start_month_range=start_month, end_month_range=end_month)

search_client = StacSearch(PLANETARY_COMPUTER)

collection = "sentinel-2-l2a"
tile_ids = ["10SGE"]
query = {"eo:cloud_cover": {"lt": 1}, "s2:mgrs_tile": {"in": tile_ids}}
sortby = [{"field": "properties.eo:cloud_cover", "direction": "asc"}]

results = search_client.stac_api_search_for_date_ranges(date_ranges=date_ranges, collections=collection, query=query, sortby=sortby, max_items=20)

sorted_items = search_client.sort_results_by_cloud_coverage()
optimal_result = sorted_items[0]

print(f"Optimal result: {optimal_result.id}, {optimal_result.datetime}, {optimal_result.properties['eo:cloud_cover']}") 

2021-06-01T00:00:00Z/2021-07-31T23:59:59Z
2022-06-01T00:00:00Z/2022-07-31T23:59:59Z
[2024-07-31 18:07:33] INFO       [MainThread][geospatial_tools.stac] S2B_MSIL2A_20210720T183919_R070_T10SGE_20210721T052505, 2021-07-20 18:39:19.024000+00:00, 0.202279
[2024-07-31 18:07:33] INFO       [MainThread][geospatial_tools.stac] S2B_MSIL2A_20210713T184919_R113_T10SGE_20210714T084909, 2021-07-13 18:49:19.024000+00:00, 0.002955
[2024-07-31 18:07:33] INFO       [MainThread][geospatial_tools.stac] S2B_MSIL2A_20210710T183919_R070_T10SGE_20210711T034836, 2021-07-10 18:39:19.024000+00:00, 0.237209
[2024-07-31 18:07:33] INFO       [MainThread][geospatial_tools.stac] S2B_MSIL2A_20210630T183919_R070_T10SGE_20210701T140951, 2021-06-30 18:39:19.024000+00:00, 0.228867
[2024-07-31 18:07:33] INFO       [MainThread][geospatial_tools.stac] S2B_MSIL2A_20210623T184919_R113_T10SGE_20210626T013807, 2021-06-23 18:49:19.024000+00:00, 0.008795
[2024-07-31 18:07:33] INFO       [MainThread][geospatial_tools.stac] S2B_MSI

In [3]:
bands = ["B02", "B03", "B04", "B08", "visual"]
file_base_path = Path(f"{DATA_DIR}/sentinel-2/test")
best_result = search_client.download_best_cloud_cover_results(bands=bands, base_directory=file_base_path)
best_result

[2024-07-31 18:07:35] INFO       [MainThread][geospatial_tools.stac] Downloading [S2A_MSIL2A_20220723T184931_R113_T10SGE_20220725T180858] ...
Downloading B02 from https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/10/S/GE/2022/07/23/S2A_MSIL2A_20220723T184931_N0400_R113_T10SGE_20220725T180858.SAFE/GRANULE/L2A_T10SGE_A037004_20220723T190242/IMG_DATA/R10m/T10SGE_20220723T184931_B02_10m.tif?st=2024-07-30T22%3A07%3A33Z&se=2024-07-31T22%3A52%3A33Z&sp=rl&sv=2024-05-04&sr=c&skoid=9c8ff44a-6a2c-4dfb-b298-1c9212f64d9a&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2024-07-31T21%3A01%3A08Z&ske=2024-08-07T21%3A01%3A08Z&sks=b&skv=2024-05-04&sig=1UV4VnkYp/s2aVRrmwjo%2BLNfKQuW3IjZxyF9ukCIdqI%3D
Downloaded /home/dev/projects/geospatial-tools/data/sentinel-2/test/S2A_MSIL2A_20220723T184931_R113_T10SGE_20220725T180858_B02.tif successfully.
Downloading B03 from https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/10/S/GE/2022/07/23/S2A_MSIL2A_20220723T184931_N0400_R113_T10SGE_20220725T180858

In [4]:
best_result.show_asset_items()

[2024-07-31 18:05:14] INFO       [MainThread][geospatial_tools.stac] Asset list for asset [S2A_MSIL2A_20220723T184931_R113_T10SGE_20220725T180858] : 
['ID: [S2A_MSIL2A_20220723T184931_R113_T10SGE_20220725T180858], Band: [B02], filename: [/home/dev/projects/geospatial-tools/data/sentinel-2/test/S2A_MSIL2A_20220723T184931_R113_T10SGE_20220725T180858_B02.tif]', 'ID: [S2A_MSIL2A_20220723T184931_R113_T10SGE_20220725T180858], Band: [B03], filename: [/home/dev/projects/geospatial-tools/data/sentinel-2/test/S2A_MSIL2A_20220723T184931_R113_T10SGE_20220725T180858_B03.tif]', 'ID: [S2A_MSIL2A_20220723T184931_R113_T10SGE_20220725T180858], Band: [B04], filename: [/home/dev/projects/geospatial-tools/data/sentinel-2/test/S2A_MSIL2A_20220723T184931_R113_T10SGE_20220725T180858_B04.tif]', 'ID: [S2A_MSIL2A_20220723T184931_R113_T10SGE_20220725T180858], Band: [B08], filename: [/home/dev/projects/geospatial-tools/data/sentinel-2/test/S2A_MSIL2A_20220723T184931_R113_T10SGE_20220725T180858_B08.tif]', 'ID: [S2A

In [25]:
image_ids = ["S2A_MSIL2A_20220723T184931_R113_T10SGE_20220725T180858"]
file_base_path = Path(f"{DATA_DIR}/sentinel-2")
bands = ["B02", "B03", "B04", "B08", "visual"]

In [83]:
# for item in results:
#     image_id = item.id
#     if image_id in image_ids:
#         print(f"Title: {image_id}")
#         print(f"Date: {item.datetime}")
#         print(f"Assets: {list(item.assets.keys())}")

downloaded_files = [
    "/home/dev/projects/geospatial-tools/data/sentinel-2/S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_B02.tif",
    "/home/dev/projects/geospatial-tools/data/sentinel-2/S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_B03.tif",
    "/home/dev/projects/geospatial-tools/data/sentinel-2/S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_B04.tif",
    "/home/dev/projects/geospatial-tools/data/sentinel-2/S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_B08.tif",
    "/home/dev/projects/geospatial-tools/data/sentinel-2/S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_visual.tif",
]

# for band in bands:
#     if band in item.assets:
#         asset = item.assets[band]
#         asset_url = asset.href
#         print(f"Downloading {band} from {asset_url}")

#         file_name = file_base_path / f"{image_id}_{band}.tif"
#         downloaded_file = download_asset(asset_url, file_name)
#         if downloaded_file:
#             downloaded_files.append(downloaded_file)
#     else:
#         print(f"Band {band} not available for {image_id}.")
print("Try reading downloaded_files")
if downloaded_files:
    print("Inside if")
    merged_file = file_base_path / "S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_merged.tif"

    total_band_count = 0
    for download_file in downloaded_files:
        print(download_file)
        with rasterio.open(download_file, 'r') as downloaded_image:
            print(downloaded_image.count)
            total_band_count += downloaded_image.count

    print(total_band_count)

    with rasterio.open(downloaded_files[0]) as meta_source:
        meta = meta_source.meta
        print(meta)
        meta.update(count=total_band_count)

    # TODO Fix for case of tif containing multiple bands
    merged_image_index = 1
    band_index = 0
    with rasterio.open(merged_file, 'w', **meta) as merged_sentinel_image:
        for file in downloaded_files:
            print(f"Writing band image: {file}")
            with rasterio.open(file) as sentinel_band_image:
                num_of_bands = sentinel_band_image.count
                for sentinel_band_image_index in range(1, num_of_bands + 1):
                    print(f"writing band {sentinel_band_image_index}")
                    print(f"writing merged index {merged_image_index}")
                    merged_sentinel_image.write_band(merged_image_index, sentinel_band_image.read(sentinel_band_image_index))
                    description = bands[band_index]
                    if num_of_bands > 1:
                        description = f"{description}-{sentinel_band_image_index}"
                    print(bands[band_index])
                    merged_sentinel_image.set_band_description(merged_image_index, description)
                    merged_sentinel_image.update_tags(merged_image_index, **sentinel_band_image.tags(sentinel_band_image_index))
                    merged_image_index += 1
                band_index += 1

    print(f"Merged file created at {merged_file}")

    # reprojected_file = file_base_path / f"{image_id}_reprojected2.tif"
    # reproject_raster(merged_file, reprojected_file, 'EPSG:5070')

    # for file in downloaded_files:
    #     os.remove(file)
    # os.remove(merged_file)

Try reading downloaded_files
Inside if
/home/dev/projects/geospatial-tools/data/sentinel-2/S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_B02.tif
1
/home/dev/projects/geospatial-tools/data/sentinel-2/S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_B03.tif
1
/home/dev/projects/geospatial-tools/data/sentinel-2/S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_B04.tif
1
/home/dev/projects/geospatial-tools/data/sentinel-2/S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_B08.tif
1
/home/dev/projects/geospatial-tools/data/sentinel-2/S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_visual.tif
3
7
{'driver': 'GTiff', 'dtype': 'uint16', 'nodata': 0.0, 'width': 10980, 'height': 10980, 'count': 1, 'crs': CRS.from_epsg(32610), 'transform': Affine(10.0, 0.0, 699960.0,
       0.0, -10.0, 4000020.0)}
Writing band image: /home/dev/projects/geospatial-tools/data/sentinel-2/S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_B02.tif
writing band 1
writing merged index 

In [84]:
with rasterio.open("/home/dev/projects/geospatial-tools/data/sentinel-2/S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_merged.tif", 'r') as sentinel_image:
    print(sentinel_image.meta)
    num_of_bands = sentinel_image.count
    print(num_of_bands)
    print(sentinel_image.descriptions)
    for band_index in range(1, num_of_bands+1):
        print(band_index)
    

{'driver': 'GTiff', 'dtype': 'uint16', 'nodata': 0.0, 'width': 10980, 'height': 10980, 'count': 7, 'crs': CRS.from_epsg(32610), 'transform': Affine(10.0, 0.0, 699960.0,
       0.0, -10.0, 4000020.0)}
7
('B02', 'B03', 'B04', 'B08', 'visual-1', 'visual-2', 'visual-3')
1
2
3
4
5
6
7


In [85]:
with rasterio.open("/home/dev/projects/geospatial-tools/data/sentinel-2/S2B_MSIL2A_20220615T183919_R070_T10SGE_20220618T191736_B02.tif", 'r') as sentinel_image:
    print(sentinel_image.meta)
    num_of_bands = sentinel_image.count
    print(num_of_bands)
    print(sentinel_image.descriptions)
    for band_index in range(1, num_of_bands+1):
        print(band_index)

{'driver': 'GTiff', 'dtype': 'uint16', 'nodata': 0.0, 'width': 10980, 'height': 10980, 'count': 1, 'crs': CRS.from_epsg(32610), 'transform': Affine(10.0, 0.0, 699960.0,
       0.0, -10.0, 4000020.0)}
1
(None,)
1
