New notebook. The following steps will be taken:
1. Manually download eHydro time cubes for the target USACE district from the CIRP website. https://cirp.usace.army.mil/products/csat_districts.php
2. Filter all the time cubes to dates after first light of S2A (2015-06-27).
3. Resample the surveys to 10-meter spatial resolution, matching that of the S2 L2W.nc products from ACOLITE.
4. Use ESA OData and OpenSearch to retrieve the needed .SAFE files.
5. Use ACOLITE to process the acquired .SAFE files, output the merged L2W.nc files
6. Create similar time cubes for the S2 data, making sure to clip to only non-cloudy pixels lying within the survey extents. Will need to reproject this data to the appropriate crs.
7. Save all data to an appropriate directory for use when training.

In [None]:
import sys, os
import re
import time
import zipfile
import requests
from datetime import datetime, timedelta
from collections import Counter

# Data handling and analysis
import numpy as np
import pandas as pd
import xarray as xr
import netCDF4 as nc

# Geospatial libraries
import rasterio
from osgeo import gdal
from pyproj import Transformer
import geopandas as gpd
import fiona
from shapely.geometry import Polygon
import asf_search as asf
import ee
from rasterio.features import rasterize
from rasterio.transform import from_origin

# Sentinel Hub
from sentinelhub import (
    SHConfig,
    DataCollection,
    SentinelHubCatalog,
    SentinelHubRequest,
    SentinelHubDownloadClient,
    BBox,
    bbox_to_dimensions,
    CRS,
    MimeType,
    Geometry,
)

# Visualization
import matplotlib.pyplot as plt
from tqdm import tqdm

# Functions

In [None]:
def get_access_token(username: str, password: str) -> str:
    data = {
        "client_id": "cdse-public",
        "username": username,
        "password": password,
        "grant_type": "password",
        "scope": "openid"
    }
    try:
        r = requests.post(
            "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
            data=data,
        )
        r.raise_for_status()
    except Exception as e:
        raise Exception(
            f"Access token creation failed. Reponse from the server was: {r.json()}"
        )    
    
    return r.json()["access_token"]

In [None]:
# txt file continaing username and password for copernicus browser, as well as the client id and secret for sentinelhub
# you gotta make your own, too lazy to keep typing in my info

with open('/home/clay/Desktop/s2_login_stuff.txt') as f:        
    lines = f.readlines()

config = SHConfig()
config.sh_client_id = lines[0][:-1]
config.sh_client_secret = lines[1][:-1]
config.sh_base_url = 'https://sh.dataspace.copernicus.eu'
config.sh_token_url = 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token'

# Set working directory

In [None]:
usace_code = 'CESWG'
BATHY_PATH = f'/home/clay/Documents/SDB/{usace_code}/bathy'        # directory where the bathymetry data was downloaded
S2_PATH = f'/home/clay/Documents/SDB/{usace_code}/s2_SAFE'
os.makedirs(S2_PATH, exist_ok=True)

# Filter the downloaded bathy time cubes to only contain surveys after S2A first light

going to filter by years instead of S2Date, hopefully will save some time

In [None]:
S2A_DATE = '2015-06-27'
FILTERED_BATHY_PATH = os.path.join(os.path.dirname(BATHY_PATH), 'bathy_filtered')
os.makedirs(FILTERED_BATHY_PATH, exist_ok=True)

In [None]:
# split .nc files by year, makes storage a bit easier.
# will also allow for training models at different dates
# can potentially use some landsat for 15 to 30m historical studies as well

years = ['2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']
for year in years:
    if year == '2015':
        start_dt = datetime.strptime(f'{year}-06-27', '%Y-%m-%d')
    else:
        start_dt = datetime.strptime(f'{year}-01-01', '%Y-%m-%d')

    if year == '2025':
        end_dt = datetime.strptime(datetime.now().strftime('%Y-%m-%d'), '%Y-%m-%d')
    else:
        end_dt = datetime.strptime(f'{year}-12-31', '%Y-%m-%d')

    start_num = float(start_dt.strftime('%Y%m%d'))
    end_num = float(end_dt.strftime('%Y%m%d'))

    os.makedirs(os.path.join(FILTERED_BATHY_PATH, year), exist_ok=True)
    
    for path in [os.path.join(BATHY_PATH, f) for f in os.listdir(BATHY_PATH) if f.endswith('.nc')]:
        ds = xr.open_dataset(path, chunks='auto')

        # filtered_ds = ds.sel(time=slice(start_num, end_num))
        filtered_ds = ds.sel(time=(ds.time > start_num) & (ds.time < end_num))

        ds.close()
        filtered_ds.to_netcdf(path.replace('bathy', f'bathy_filtered/{year}'))
        filtered_ds.close()