# Download Sentinel-2 data for ASO rasters

Given an ASO raster, find Sentinel-2 scenes over the same area within a week of lidar acqusition, then choose scene with most snow pixels (find max coverage area and least cloud pixels).

In [None]:
# based on exmaples from
# https://planetarycomputer.microsoft.com/docs/tutorials/cloudless-mosaic-sentinel2/
# https://planetarycomputer.microsoft.com/dataset/sentinel-2-l2a#Example-Notebook
from pystac.extensions.eo import EOExtension as eo
import pystac_client
import planetary_computer
import glob
import rioxarray as rxr
import re, os
import datetime
import pandas as pd
from shapely.geometry import box
import odc.stac
import rasterio as rio
from rasterio.enums import Resampling

In [None]:
def sentinel2_for_aso(aso_raster_fn, dataset_path):
    
    time = pd.to_datetime(re.search(r"(\d{4}\d{2}\d{2})", aso_raster_fn).group())
    week_before = (time - datetime.timedelta(weeks=0.2)).strftime('%Y-%m-%d')
    week_after = (time + datetime.timedelta(weeks=0.2)).strftime('%Y-%m-%d')
    time_of_interest = f'{week_before}/{week_after}'
    
    aso_raster = rxr.open_rasterio(aso_raster_fn).squeeze()
    bounds_latlon = box(*aso_raster.rio.transform_bounds("EPSG:4326"))
    
    catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace)

    search = catalog.search(
        collections=["sentinel-2-l2a"],
        intersects=bounds_latlon,
        datetime=time_of_interest)

    # Check how many items were returned
    items = search.item_collection()
    print(f"Returned {len(items)} Items")
    
    sentinel2_stac = odc.stac.load(items,chunks={"x": 2048, "y": 2048},resolution=50, groupby='solar_day')
    sentinel2_stac_clipped = sentinel2_stac.rio.clip_box(*bounds_latlon.bounds,crs="EPSG:4326")
    scl = sentinel2_stac_clipped['SCL'].rio.reproject_match(aso_raster, resampling=rio.enums.Resampling.bilinear).where(aso_raster>=0)
    classes = [ #SCL classes here: https://custom-scripts.sentinel-hub.com/custom-scripts/sentinel-2/scene-classification/
    #0,   #No Data (Missing data)	#000000	
    #1,   #Saturated or defective pixel	#ff0000	
    #2,   #Topographic casted shadows (called "Dark features/Shadows" for data before 2022-01-25)	#2f2f2f	
    #3,   #Cloud shadows	#643200	
    4,   #Vegetation	#00a000	
    5,   #Not-vegetated	#ffe65a	
    #6,   #Water	#0000ff	
    #7,   #Unclassified	#808080	
    #8,   #Cloud medium probability	#c0c0c0	
    #9,   #Cloud high probability	#ffffff	
    #10,   #Thin cirrus	#64c8ff	
    11    #Snow or ice      
    ]
    
    idx_least_clouds = scl.where(scl.isin(classes)).sum(dim=['x','y']).idxmax()
    sentinel2_best_lowcloud = sentinel2_stac_clipped.sel(time=idx_least_clouds)
    
    # Ensuring directory exists for saving data
    os.makedirs(os.path.join(dataset_path, 'sentinel-2'), exist_ok=True)
    
    sentinel2_best_lowcloud.to_netcdf(os.path.join(dataset_path, 'sentinel-2', f'{pd.to_datetime(idx_least_clouds.values).strftime("%Y%m%d")}_for_{aso_raster_fn.replace('\\','/').split("/")[-1][:-4]}.nc'))    
    #return sentinel2_best_lowcloud

In [None]:
aso_raster_fns = glob.glob(r"/home/ayushg12/ML_GEO2024_ayushg12/mlgeo-2024-deep-snow/final_data/ASO_50m_SD_cleaned/utm12n/ASO*.tif")
dataset_path = "/home/ayushg12/ML_GEO2024_ayushg12/mlgeo-2024-deep-snow/final_data"

In [7]:
len(aso_raster_fns)

9

In [None]:
for i, aso_raster_fn in enumerate(aso_raster_fns):
    error_list = []
    print(f'----\nworking on {aso_raster_fn.replace('\\','/').split("/")[-1]}, {i+1}/{len(aso_raster_fns)}\n----')
    # try: 
    sentinel2_for_aso(aso_raster_fn, dataset_path)
    # except:
    #     print('error, skipping')
    #     error_list.append(aso_raster_fn)

----
working on ASO_50M_SD_WindRiver_20220611_clean.tif, 1/9
----
Returned 2 Items
----
working on ASO_50M_SD_GreenRiver_20220611_clean.tif, 2/9
----
Returned 2 Items
----
working on ASO_50M_SD_Dolores_20230525_clean.tif, 3/9
----
Returned 8 Items
----
working on ASO_50M_SD_Dolores_20220415_clean.tif, 4/9
----
Returned 8 Items
----
working on ASO_50M_SD_Dolores_20230406_clean.tif, 5/9
----
Returned 8 Items
----
working on ASO_50M_SD_Dolores_20220510_clean.tif, 6/9
----
Returned 8 Items
----
working on ASO_50M_SD_USUTLC_20210318_clean.tif, 7/9
----
Returned 4 Items
----
working on ASO_50M_SD_Dolores_20210420_clean.tif, 8/9
----
Returned 8 Items
----
working on ASO_50M_SD_Dolores_20210514_clean.tif, 9/9
----
Returned 4 Items


In [None]:
# have to do this one separately with only 0.2 week on either side due to a broken link. 
aso_raster_fn = r"/home/ayushg12/ML_GEO2024_ayushg12/mlgeo-2024-deep-snow/final_data/ASO_50m_SD_cleaned/utm10n/ASO_50M_SD_Yuba_20230405_clean.tif"
dataset_path = "/home/ayushg12/ML_GEO2024_ayushg12/mlgeo-2024-deep-snow/final_data"
sentinel2_for_aso(aso_raster_fn, dataset_path)

Returned 11 Items
