This notebook is going to leverage asf_search to retrieve Sentinel-2 .SAFE images that correspond to the eHydro hydrographic surveys. These .SAFE files will then be fed into ACOLITE for the needed preprocessing. Once preprocessed, the images for the hydrographic surveys and the Sentinel-2 images will be fed into 02_data_prep.ipynb to ensure the same area coverage

In [None]:
import os
from tqdm import tqdm
from osgeo import gdal
import rasterio
import numpy as np
from pyproj import Transformer
from datetime import datetime, timedelta
import re
import matplotlib.pyplot as plt
from collections import Counter
import asf_search as asf
from shapely.geometry import Polygon
import pandas as pd
import requests
from sentinelhub import (
    SHConfig,
    DataCollection,
    SentinelHubCatalog,
    SentinelHubRequest,
    SentinelHubDownloadClient,
    BBox,
    bbox_to_dimensions,
    CRS,
    MimeType,
    Geometry,
)

In [None]:
# txt file continaing username and password for copernicus browser, as well as the client id and secret for sentinelhub
# you gotta make your own, too lazy to keep typing in my info

with open('/home/clay/Desktop/s2_login_stuff.txt') as f:        
    lines = f.readlines()

In [None]:
def get_access_token(username: str, password: str) -> str:
    data = {
        "client_id": "cdse-public",
        "username": username,
        "password": password,
        "grant_type": "password",
        "scope": "openid"
    }
    try:
        r = requests.post(
            "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
            data=data,
        )
        r.raise_for_status()
    except Exception as e:
        raise Exception(
            f"Access token creation failed. Reponse from the server was: {r.json()}"
        )
    return r.json()["access_token"]

In [None]:
config = SHConfig()
config.sh_client_id = lines[0][:-1]
config.sh_client_secret = lines[1][:-1]
config.sh_base_url = 'https://sh.dataspace.copernicus.eu'
config.sh_token_url = 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token'

# Functions

In [None]:
def visualize_bathy_raster(path):
    with rasterio.open(path) as src:
        bathy = src.read(1)
        xmin, ymin, xmax, ymax = src.bounds
    
    plt.imshow(
        bathy,
        extent=(xmin, xmax, ymin, ymax),
        origin="lower",
        cmap="viridis"
    )
    plt.colorbar(label="Depth (Feet)")
    plt.title("Rasterized Bathymetry")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.show()

def extract_date(filepath):
    """extract search date window from the eHydro data"""
    match = re.search(r'\d{4}\d{2}\d{2}', filepath)
    date = datetime.strptime(match.group(), '%Y%m%d')
    return (date - timedelta(days=1)).strftime('%Y-%m-%d'), (date + timedelta(days=1)).strftime('%Y-%m-%d')

def extract_valid_bounds_to_epsg4326(raster_path):
    """Extracts the bounding box of valid (non-NaN) data from a raster and converts it to EPSG:4326."""
    with rasterio.open(raster_path) as src:
        # Read the raster data
        data = src.read(1)  # Assuming a single band
        transform = src.transform  # Affine transform of the raster
        src_crs = src.crs  # Source CRS of the raster

        # Create a mask for valid (non-NaN) pixels
        valid_mask = ~np.isnan(data)

        # Find the row and column indices of valid pixels
        valid_rows, valid_cols = np.where(valid_mask)

        if valid_rows.size == 0 or valid_cols.size == 0:
            raise ValueError("No valid data in the raster.")

        # Calculate the geographic coordinates of the valid bounds
        min_row, max_row = valid_rows.min(), valid_rows.max()
        min_col, max_col = valid_cols.min(), valid_cols.max()

        # Use the transform to convert row/col to geographic bounds
        min_x, min_y = rasterio.transform.xy(transform, min_row, min_col, offset="ul")
        max_x, max_y = rasterio.transform.xy(transform, max_row, max_col, offset="ul")

        # Bounds in the source CRS
        bounds_src_crs = (min_x, min_y, max_x, max_y)

        # Transform bounds to EPSG:4326
        transformer = Transformer.from_crs(src_crs, "EPSG:4326", always_xy=True)
        min_x_4326, min_y_4326 = transformer.transform(min_x, min_y)
        max_x_4326, max_y_4326 = transformer.transform(max_x, max_y)

        bounds_epsg4326 = (min_x_4326, min_y_4326, max_x_4326, max_y_4326)

        # Create polygon coordinates in clockwise order starting from top-left
        coords = [
            (min_x_4326, max_y_4326),  # top-left
            (max_x_4326, max_y_4326),  # top-right
            (max_x_4326, min_y_4326),  # bottom-right
            (min_x_4326, min_y_4326),  # bottom-left
            (min_x_4326, max_y_4326)   # back to top-left to close the polygon
        ]
    
        # Format coordinates into WKT string
        coord_str = ','.join([f'{x} {y}' for x, y in coords])
        wkt = f'POLYGON(({coord_str}))'
    
        # bbox = ee.Geometry.BBox(bounds_epsg4326[0], bounds_epsg4326[1], bounds_epsg4326[2], bounds_epsg4326[3])

    return bounds_epsg4326

# Set working directory

In [None]:
usace_code = 'CESWG'
BATHY_PATH = f'/home/clay/Documents/SDB/{usace_code}/bathy_rasters'        # STORAGE_DIR from 01a_get_ehydro.ipynb
S2_PATH = f'/home/clay/Documents/SDB/{usace_code}/s2_SAFE'
os.makedirs(S2_PATH, exist_ok=True)

In [None]:
surveynames = [f[:-4] for f in os.listdir(BATHY_PATH) if f.endswith('.tif')]

# Search for appropriate Sentinel-2 L1C .SAFE files
- .SAFE needed for input into ACOLITE

In [None]:
access_token = get_access_token(lines[-2][:-1], lines[-1][:-1])
data_collection = 'SENTINEL-2'

survey_info = {}
for survey_name in surveynames:

    raster = os.path.join(BATHY_PATH, f"{survey_name}.tif")
    date = extract_date(raster)
    bounds = extract_valid_bounds_to_epsg4326(raster)

    json = requests.get(f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq '{data_collection}' and OData.CSC.Intersects(area=geography'SRID=4326;{bounds}') and ContentDate/Start gt {date[0]}T00:00:00.000Z and ContentDate/Start lt {date[1]}T00:00:00.000Z").json()
    results=pd.DataFrame.from_dict(json['value'])

    if len(results) != 0:
        urls = []
        s2_names = []
        for s2_name in list(results.Name):
            if 'L1C' in s2_name:
                urls.append(f"https://zipper.dataspace.copernicus.eu/odata/v1/Products({(results[results.Name == s2_name]['Id'].values[0])})/$value")
                s2_names.append(s2_name)
        survey_info[survey_name] = (urls, s2_names)
    else:
        continue

# Download Copernicus Hub for Sentinel-2 L1C .SAFE files
- iterate by eHydro name. Create a folder for each survey
- store all .SAFE files in designated survey folder
- .SAFE files named appropriately as stored in .items
- Mosaic together during ACOLITE processing, or in 02_data_prep.ipynb

no worky ones:
- S2A_MSIL1C_20221025T165401_N0510_R026_T15RTN_20240728T042150.SAFE
- S2A_MSIL1C_20230413T164841_N0509_R026_T15RUN_20230413T220740.SAFE
- S2A_MSIL1C_20230413T164841_N0510_R026_T15RUN_20240824T151650.SAFE
- S2B_MSIL1C_20230518T164849_N0509_R026_T15RVP_20230518T220853.SAFE
- S2B_MSIL1C_20230518T164849_N0509_R026_T15RUP_20230518T220853.SAFE
- S2A_MSIL1C_20230304T165201_N0510_R026_T15RUP_20240819T071321.SAFE
- S2B_MSIL1C_20200811T164849_N0500_R026_T15RUP_20230510T185335.SAFE
- S2B_MSIL1C_20200811T164849_N0500_R026_T15RTN_20230510T185335.SAFE
- S2B_MSIL1C_20200811T164849_N0500_R026_T15RUN_20230510T185335.SAFE
- S2B_MSIL1C_20221013T170239_N0400_R069_T15RTN_20221013T215905.SAFE
- S2A_MSIL1C_20200501T165901_N0500_R069_T14RQT_20230330T120915.SAFE
- S2A_MSIL1C_20200501T165901_N0500_R069_T15RTM_20230330T120915.SAFE
- S2A_MSIL1C_20230722T164901_N0509_R026_T15RUN_20230722T215211.SAFE
- S2B_MSIL1C_20191218T170719_N0500_R069_T14RQS_20230607T033311.SAFE
- S2B_MSIL1C_20191218T170719_N0500_R069_T15RTM_20230607T033311.SAFE

In [None]:


# Main progress bar for surveys
for key, items in tqdm(survey_info.items(), desc="Processing surveys"):
    if len(items[0]) == 0:
        continue
        
    os.makedirs(os.path.join(S2_PATH, key), exist_ok=True)
    headers = {"Authorization": f"Bearer {access_token}"}
    session = requests.Session()
    session.headers.update(headers)
    access_token = get_access_token(lines[-2][:-1], lines[-1][:-1])
    
    # Process each file within the survey
    for url, s2_name in zip(items[0], items[1]):
        try:
            response = session.get(url, headers=headers, stream=True)
            response.raise_for_status()  # Raise an exception for bad status codes
            
            total_size = int(response.headers.get('content-length', 0))
            file_path = os.path.join(S2_PATH, f"{key}/{s2_name[:-5]}.zip")
            
            # Progress bar for individual file download
            with tqdm(
                total=total_size,
                unit='B',
                unit_scale=True,
                desc=f"Downloading {s2_name}",
                leave=True  # Keep the progress bar after completion
            ) as pbar:
                with open(file_path, "wb") as file:
                    for chunk in response.iter_content(chunk_size=8192):
                        if chunk:
                            file.write(chunk)
                            pbar.update(len(chunk))
                            
        except requests.exceptions.RequestException as e:
            print(f"Error downloading {s2_name}: {str(e)}")
            continue

In [None]:
# unzip the files
# you may need to use sudo apt install parallel in a bash terminal
# !find . -type f -name "*.zip" | parallel unzip -o {} -d {//}

# Feed to ACOLITE
- mosaic images if multiple corresponding to a single eHydro survey, possible due to survey covering multiple S2 tiles
- will do ACOLITE processing in this notebook once all .SAFE files are downloaded
- will reproject Bathy and S2 rasters to common CRS in 02_data_prep.ipynb

In [None]:
os.path.join(S2_PATH, 'HS_03_BMP_20240226_CS')

In [None]:
test_set = [os.path.join(S2_PATH, f'HS_03_BMP_20240226_CS/{f}') for f in os.listdir(os.path.join(S2_PATH, 'HS_03_BMP_20240226_CS')) if f.endswith('.SAFE')]

In [None]:
# add acolite clone to Python path and import acolite
import sys, os
user_home = os.path.expanduser("~")
sys.path.append(user_home+'/tools/acolite')
import acolite as ac

In [None]:
# get images from a single survey for ACOLITE testing
# make it a survey with multiple images, will test mosaicking capabilities
# mosaciking may not be needed, since the model will be trained pixelwise

os.listdir(test_set[0])

In [None]:

# in ~/.netrc file
# # add EARTHDATA_u and EARTHDATA_p
# os.environ['EARTHDATA_u'] = ''
# os.environ['EARTHDATA_p'] = ''

# scenes to process
bundles = test_set
# alternatively use glob
# import glob
# bundles = glob.glob('/path/to/scene*')

# output directory
out_dir = os.path.join(S2_PATH, f'HS_03_BMP_20240226_CS/acolite_test')
os.makedirs(out_dir, exist_ok=True)

# optional 4 element limit list [S, W, N, E] 
bounds = extract_valid_bounds_to_epsg4326(os.path.join(BATHY_PATH, 'HS_03_BMP_20240226_CS.tif'))


# optional file with processing settings
# if set to None defaults will be used
settings_file = None

# run through bundles
for bundle in bundles:
    # import settings
    settings = ac.acolite.settings.parse(settings_file)
    # set settings provided above
    # settings['limit'] = boundsx
    settings['inputfile'] = bundle
    settings['output'] = out_dir
    # other settings can also be provided here, e.g.
    settings['s2_target_res'] = 10
    settings['dsf_aot_estimate'] = 'fixed'
    # settings['l2w_parameters'] = ['t_nechad', 't_dogliotti']

    # process the current bundle
    ac.acolite.acolite_run(settings=settings)

# The above cell processes the L1C images (still need to mess with settings a bit), but does not clip to the boundaries of the survey. Clipping to only survey pixels will probably be done in 02_data_prep.ipynb