New notebook. The following steps will be taken:
1. Manually download eHydro time cubes for the target USACE district from the CIRP website. https://cirp.usace.army.mil/products/csat_districts.php
2. Filter all the time cubes to dates after first light of S2A (2015-06-27).
3. Resample the surveys to 10-meter spatial resolution, matching that of the S2 L2W.nc products from ACOLITE.
4. Use ESA OData and OpenSearch to retrieve the needed .SAFE files.
5. Use ACOLITE to process the acquired .SAFE files, output the merged L2W.nc files
6. Create similar time cubes for the S2 data, making sure to clip to only non-cloudy pixels lying within the survey extents. Will need to reproject this data to the appropriate crs.
7. Save all data to an appropriate directory for use when training.

In [None]:
import sys, os
import re
import time
import zipfile
import requests
from datetime import datetime, timedelta
from collections import Counter
from scipy import interpolate

# Data handling and analysis
import numpy as np
import pandas as pd
import xarray as xr
import nctoolkit as nc

# Geospatial libraries
import rasterio
from osgeo import gdal
from pyproj import Transformer
import geopandas as gpd
import fiona
from shapely.geometry import Polygon
import asf_search as asf
import ee
from rasterio.features import rasterize
from rasterio.transform import from_origin

# Sentinel Hub
from sentinelhub import (
    SHConfig,
    DataCollection,
    SentinelHubCatalog,
    SentinelHubRequest,
    SentinelHubDownloadClient,
    BBox,
    bbox_to_dimensions,
    CRS,
    MimeType,
    Geometry,
)

# Visualization
import matplotlib.pyplot as plt
from tqdm import tqdm

# Set working directory

In [None]:
usace_code = 'CESWG'
BATHY_PATH = f'/home/clay/Documents/SDB/{usace_code}/bathy'        # directory where the bathymetry data was downloaded

# Filter the downloaded bathy time cubes to only contain surveys after S2A first light

going to filter by years instead of S2Date, hopefully will save some time

In [None]:
S2A_DATE = '2015-06-27'
FILTERED_BATHY_PATH = os.path.join(os.path.dirname(BATHY_PATH), 'bathy_filtered')
os.makedirs(FILTERED_BATHY_PATH, exist_ok=True)

In [None]:
# split .nc files by year, makes storage a bit easier.
# will also allow for training models at different dates
# can potentially use some landsat for 15 to 30m historical studies as well
# probably add something in here to update the metadata attributes

years = ['2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']
for year in years:
    if year == '2015':
        start_dt = datetime.strptime(f'{year}-06-27', '%Y-%m-%d')
    else:
        start_dt = datetime.strptime(f'{year}-01-01', '%Y-%m-%d')

    if year == '2025':
        end_dt = datetime.strptime(datetime.now().strftime('%Y-%m-%d'), '%Y-%m-%d')
    else:
        end_dt = datetime.strptime(f'{year}-12-31', '%Y-%m-%d')

    start_num = float(start_dt.strftime('%Y%m%d'))
    end_num = float(end_dt.strftime('%Y%m%d'))

    os.makedirs(os.path.join(FILTERED_BATHY_PATH, year), exist_ok=True)
    
    for path in [os.path.join(BATHY_PATH, f) for f in os.listdir(BATHY_PATH) if f.endswith('.nc')]:
        ds = xr.open_dataset(path, chunks='auto')

        # filtered_ds = ds.sel(time=slice(start_num, end_num))
        filtered_ds = ds.sel(time=(ds.time > start_num) & (ds.time < end_num))

        ds.close()
        filtered_ds.to_netcdf(path.replace('bathy', f'bathy_filtered/{year}'))
        filtered_ds.close()

# Resample the filtered time cubes from 10ft spatial resolution to 10meter to match S2 files
- nctoolkit seems useful
- I think the setup for this .nc files are messy and can't be loaded by nctoolkit. Will check the CSAT code to see how they extract the data. If that's not promising, just gonna go back to my original workflow.

In [None]:
RESAMPLED_BATHY_PATH = os.path.join(os.path.dirname(FILTERED_BATHY_PATH), 'bathy_resampled')
os.makedirs(RESAMPLED_BATHY_PATH, exist_ok=True)

TARGETYEAR = '2024'
os.makedirs(os.path.join(RESAMPLED_BATHY_PATH, TARGETYEAR), exist_ok=True)

test = [os.path.join(FILTERED_BATHY_PATH, TARGETYEAR, f) for f in os.listdir(os.path.join(FILTERED_BATHY_PATH, TARGETYEAR)) if f.endswith('.nc')][0]

In [None]:
# Open the dataset
ds = xr.open_dataset(test)  # Replace with actual filename

# Resample dataset using coarsen
scale_factor = int(32.8084 / 10)  # Convert 10ft to 10m resolution
ds_resampled = ds.coarsen(points=scale_factor, boundary="trim").mean()

# Extract first time entry
time_index = 0  # First time step

# Get original and resampled data
original_elevations = ds.elevations.isel(time=time_index)
resampled_elevations = ds_resampled.elevations.isel(time=time_index)

# Get corresponding lat/lon
lat_original = ds.latitudes
lon_original = ds.longitudes

lat_resampled = ds_resampled.latitudes
lon_resampled = ds_resampled.longitudes

# Plot the original data
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.scatter(lon_original, lat_original, c=original_elevations, cmap="viridis", s=1)
plt.colorbar(label="Elevation (m)")
plt.title("Original Data (10ft resolution)")
plt.xlabel("Longitude")
plt.ylabel("Latitude")

# Plot the resampled data
plt.subplot(1, 2, 2)
plt.scatter(lon_resampled, lat_resampled, c=resampled_elevations, cmap="viridis", s=5)
plt.colorbar(label="Elevation (m)")
plt.title("Resampled Data (10m resolution)")
plt.xlabel("Longitude")
plt.ylabel("Latitude")

# Show plots
plt.tight_layout()
plt.show()

In [None]:
# Plot first 3 surveys from the ds
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Extract survey IDs and dates for titles
# survey_ids = ds.variables['surveyId'][:3]
survey_dates = ds.variables['time'][:3]

# Plot each of the first 3 surveys
for i in range(3):
    # Get elevation data for this time step
    elevation_data = ds.variables['elevations'][i]
    
    # Create scatter plot using points
    scatter = axes[i].scatter(
        ds.variables['longitudes'][:],
        ds.variables['latitudes'][:],
        c=elevation_data,
        cmap='viridis',
        s=1
    )
    
    # Add colorbar and title
    plt.colorbar(scatter, ax=axes[i], label='Elevation')
    axes[i].set_title(f'Survey Date: {survey_dates[i]}')
    axes[i].set_xlabel('Longitude')
    axes[i].set_ylabel('Latitude')

plt.tight_layout()
plt.show()