This notebook is going to leverage asf_search to retrieve Sentinel-2 .SAFE images that correspond to the eHydro hydrographic surveys. These .SAFE files will then be fed into ACOLITE for the needed preprocessing. Once preprocessed, the images for the hydrographic surveys and the Sentinel-2 images will be fed into 02_data_prep.ipynb to ensure the same area coverage

In [None]:
import os
import getpass
from osgeo import gdal
import rasterio
import numpy as np
from pyproj import Transformer
from datetime import datetime, timedelta
import re
import matplotlib.pyplot as plt
from collections import Counter
import asf_search as asf
from shapely.geometry import Polygon
import pandas as pd
import requests
from sentinelhub import (
    SHConfig,
    DataCollection,
    SentinelHubCatalog,
    SentinelHubRequest,
    SentinelHubDownloadClient,
    BBox,
    bbox_to_dimensions,
    CRS,
    MimeType,
    Geometry,
)

In [None]:
# Import credentials
# from creds import *


def get_access_token(username: str, password: str) -> str:
    data = {
        "client_id": "cdse-public",
        "username": username,
        "password": password,
        "grant_type": "password",
    }
    try:
        r = requests.post(
            "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
            data=data,
        )
        r.raise_for_status()
    except Exception as e:
        raise Exception(
            f"Access token creation failed. Reponse from the server was: {r.json()}"
        )
    return r.json()["access_token"]

access_token = get_access_token()

# access_token = get_access_token(
#     getpass.getpass("Enter your Copernicus username"),
#     getpass.getpass("Enter your Copernicus password")
# )

In [None]:
config = SHConfig()
config.sh_client_id = 
config.sh_client_secret = 
config.sh_base_url = 'https://sh.dataspace.copernicus.eu'
config.sh_token_url = 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token'

# Functions

In [None]:
def asf_search_results(startdate, enddate, wkt_aoi, pform, proclevel, bmode, flightD, pol):
    results = asf.search(
        platform= pform,
        processingLevel=[proclevel],
        start = startdate,
        end = enddate,
        intersectsWith = wkt_aoi,
        beamMode= bmode,
        flightDirection= flightD,
        polarization= pol
    )
    
    return results.geojson(), results

def asf_download(earthdatausr, earthdatapass, directory, metadata, results, direction):
    #establish download session
    session = asf.ASFSession().auth_with_creds(earthdatausr, earthdatapass)

    #create empty dictionary to store epoch information
    epoch_info ={}

    # paths, frames, filenames, and asf search products into list
    paths = [feature["properties"]["pathNumber"] for feature in metadata['features']]
    frames = [feature["properties"]["frameNumber"] for feature in metadata['features']]
    filenames = [feature["properties"]["fileName"] for feature in metadata['features']]
    asfprods = list(results)

    # create directories for each path-frame combination, used to store like epochs (easy processing later)
    for path in list(Counter(paths)):
        path_dir = os.path.join(directory, f"{direction}/{path}")
        os.makedirs(path_dir, exist_ok=True)
        for frame in list(Counter(frames)):
            frame_dir = os.path.join(path_dir, f"{frame}")
            os.makedirs(frame_dir, exist_ok=True)

    # popualate epoch_info
    for name, path, frame, asfprod in zip(filenames, paths, frames, asfprods):
        epoch_info[name] = [path, frame, asfprod]

    # download SLC epochs from ASF vertex to apprroprate directories
    for name in epoch_info:
        path = epoch_info[name][0]
        frame = epoch_info[name][1]
        asfprod = epoch_info[name][2]

        asfprod.download(
            path = os.path.join(directory, f'{direction}/{path}/{frame}'),
            session = session
        )

def visualize_bathy_raster(path):
    with rasterio.open(path) as src:
        bathy = src.read(1)
        xmin, ymin, xmax, ymax = src.bounds
    
    plt.imshow(
        bathy,
        extent=(xmin, xmax, ymin, ymax),
        origin="lower",
        cmap="viridis"
    )
    plt.colorbar(label="Depth (Feet)")
    plt.title("Rasterized Bathymetry")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.show()

def extract_date(filepath):
    """extract search date window from the eHydro data"""
    match = re.search(r'\d{4}\d{2}\d{2}', filepath)
    date = datetime.strptime(match.group(), '%Y%m%d')
    return (date - timedelta(days=1)).strftime('%Y-%m-%d'), (date + timedelta(days=1)).strftime('%Y-%m-%d')

def extract_valid_bounds_to_epsg4326(raster_path):
    """Extracts the bounding box of valid (non-NaN) data from a raster and converts it to EPSG:4326."""
    with rasterio.open(raster_path) as src:
        # Read the raster data
        data = src.read(1)  # Assuming a single band
        transform = src.transform  # Affine transform of the raster
        src_crs = src.crs  # Source CRS of the raster

        # Create a mask for valid (non-NaN) pixels
        valid_mask = ~np.isnan(data)

        # Find the row and column indices of valid pixels
        valid_rows, valid_cols = np.where(valid_mask)

        if valid_rows.size == 0 or valid_cols.size == 0:
            raise ValueError("No valid data in the raster.")

        # Calculate the geographic coordinates of the valid bounds
        min_row, max_row = valid_rows.min(), valid_rows.max()
        min_col, max_col = valid_cols.min(), valid_cols.max()

        # Use the transform to convert row/col to geographic bounds
        min_x, min_y = rasterio.transform.xy(transform, min_row, min_col, offset="ul")
        max_x, max_y = rasterio.transform.xy(transform, max_row, max_col, offset="ul")

        # Bounds in the source CRS
        bounds_src_crs = (min_x, min_y, max_x, max_y)

        # Transform bounds to EPSG:4326
        transformer = Transformer.from_crs(src_crs, "EPSG:4326", always_xy=True)
        min_x_4326, min_y_4326 = transformer.transform(min_x, min_y)
        max_x_4326, max_y_4326 = transformer.transform(max_x, max_y)

        bounds_epsg4326 = (min_x_4326, min_y_4326, max_x_4326, max_y_4326)

        # Create polygon coordinates in clockwise order starting from top-left
        coords = [
            (min_x_4326, max_y_4326),  # top-left
            (max_x_4326, max_y_4326),  # top-right
            (max_x_4326, min_y_4326),  # bottom-right
            (min_x_4326, min_y_4326),  # bottom-left
            (min_x_4326, max_y_4326)   # back to top-left to close the polygon
        ]
    
        # Format coordinates into WKT string
        coord_str = ','.join([f'{x} {y}' for x, y in coords])
        wkt = f'POLYGON(({coord_str}))'
    
        # bbox = ee.Geometry.BBox(bounds_epsg4326[0], bounds_epsg4326[1], bounds_epsg4326[2], bounds_epsg4326[3])

    return wkt

# Set working directory

In [None]:
usace_code = 'CESWG'
BATHY_PATH = f'/home/clay/Documents/SDB/{usace_code}/bathy_rasters'        # STORAGE_DIR from 01a_get_ehydro.ipynb
S2_PATH = f'/home/clay/Documents/SDB/{usace_code}/s2_SAFE'
os.makedirs(S2_PATH, exist_ok=True)

In [None]:
surveynames = [f[:-4] for f in os.listdir(BATHY_PATH) if f.endswith('.tif')]

In [None]:
# survey_info = {}
# for name in surveynames:

#     raster = os.path.join(BATHY_PATH, f"{name}.tif")
#     date = extract_date(raster)
#     bounds = extract_valid_bounds_to_epsg4326(raster)
#     time_interval = date[0], date[1]  

#     survey_info[name] = [bounds, date]

# Search for appropriate Sentinel-2 L1C .SAFE files
- .SAFE needed for input into ACOLITE

In [None]:
data_collection = 'SENTINEL-2'

survey_info = {}
for name in surveynames:

    raster = os.path.join(BATHY_PATH, f"{name}.tif")
    date = extract_date(raster)
    bounds = extract_valid_bounds_to_epsg4326(raster)

    json = requests.get(f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq '{data_collection}' and OData.CSC.Intersects(area=geography'SRID=4326;{bounds}') and ContentDate/Start gt {date[0]}T00:00:00.000Z and ContentDate/Start lt {date[1]}T00:00:00.000Z").json()
    results=pd.DataFrame.from_dict(json['value'])

    if len(results) != 0:
        for name in list(results.Name):
            if 'L1C' in name:
                test_id = list(results[results.Name == name]['Id'])[0]
                url = f"https://zipper.dataspace.copernicus.eu/odata/v1/Products({(test_id)})/$value"
                survey_info[name] = url
    else:
        continue

# Download Copernicus Hub for Sentinel-2 L1C .SAFE files

In [None]:
for name, url in survey_info.items():
    headers = {"Authorization": f"Bearer {access_token}"}

    session = requests.Session()
    session.headers.update(headers)
    response = session.get(url, headers=headers, stream=True)

    with open(os.path.join(S2_PATH, f"{name[:-5]}.zip"), "wb") as file:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                file.write(chunk)

# Feed to ACOLITE
- will do ACOLITE processing in this notebook once all .SAFE files are downloaded
- will reproject Bathy and S2 rasters to common CRS in 02_data_prep.ipynb

In [None]:
# add acolite clone to Python path and import acolite
import sys, os
user_home = os.path.expanduser("~")
sys.path.append(user_home+'/git/acolite')
import acolite as ac

# add EARTHDATA_u and EARTHDATA_p
os.environ['EARTHDATA_u'] = ''
os.environ['EARTHDATA_p'] = ''

# scenes to process
bundles = ['/path/to/scene1', '/path/to/scene2']
# alternatively use glob
# import glob
# bundles = glob.glob('/path/to/scene*')

# output directory
odir = '/path/to/output/directory'

# optional 4 element limit list [S, W, N, E] 
limit = None

# optional file with processing settings
# if set to None defaults will be used
settings_file = None

# run through bundles
for bundle in bundles:
    # import settings
    settings = ac.acolite.settings.parse(settings_file)
    # set settings provided above
    settings['limit'] = limit
    settings['inputfile'] = bundle
    settings['output'] = odir
    # other settings can also be provided here, e.g.
    # settings['s2_target_res'] = 60
    # settings['dsf_aot_estimate'] = 'fixed'
    # settings['l2w_parameters'] = ['t_nechad', 't_dogliotti']

    # process the current bundle
    ac.acolite.acolite_run(settings=settings)