# This notebook is going to download eHydro bathymetry data from the USACE ArcGIS REST repository, as well as retrieve cloud masked imagery of the same location, at the same time, for training of Satellite Derived Bathymetry model(s) for the National Channel Framework (NCF)

In [15]:
import geopandas as gpd
import pandas as pd
import fiona
import numpy as np
from shapely.geometry import Polygon
from shapely.wkt import loads
from rasterio.features import rasterize
from rasterio.transform import from_origin
from datetime import datetime, timedelta
import rasterio
import matplotlib.pyplot as plt
from tqdm import tqdm
import requests
import os
import zipfile
from pyproj import CRS
import re
from osgeo import gdal

# Functions

In [52]:
def ehydro_date_convert(time):
    return datetime.utcfromtimestamp(time / 1000).strftime('%Y-%m-%d')

def download_file(url, destination):
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    with open(destination, 'wb') as file, tqdm(
        desc=f"Downloading {os.path.basename(destination)}",
        total=total_size,
        unit='B',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for chunk in response.iter_content(chunk_size=1024):
            file.write(chunk)
            bar.update(len(chunk))

def export_rasterized_bathymetry_native_crs(gdf, outpath, resolution):
    """
    Exports a rasterized bathymetry GeoDataFrame in its original CRS.

    Args:
        gdf: GeoDataFrame with bathymetry polygons and a "depthMean" column.
        outpath: Output file path for the raster.
    """
    # Get the bounding box and resolution
    xmin, ymin, xmax, ymax = gdf.total_bounds

    # Define the transform
    transform = from_origin(xmin, ymax, resolution, resolution)

    # Prepare raster shapes
    shapes = [(geom, value) for geom, value in zip(gdf.geometry, gdf["depthMean"])]

    # Calculate the raster dimensions
    height = int((ymax - ymin) / resolution)
    width = int((xmax - xmin) / resolution)

    # Rasterize the bathymetry data
    raster = rasterize(
        shapes,
        out_shape=(height, width),
        transform=transform,
        fill=np.nan,  # No data outside polygons
        dtype="float32",
    )

    # Save the raster in its original CRS
    with rasterio.open(
        outpath,
        "w",
        driver="GTiff",
        height=height,
        width=width,
        count=1,
        dtype="float32",
        crs=gdf.crs,  # Keep the original CRS
        transform=transform,
        nodata=np.nan,
    ) as dst:
        dst.write(raster, 1)

    print(f"Raster in original CRS saved to {outpath}")

def export_rasterized_bathymetry_native_crs_2(gdf, outpath, resolution, method='cubic'):
    """
    Exports a rasterized bathymetry GeoDataFrame with improved interpolation.

    Args:
        gdf: GeoDataFrame with bathymetry polygons and a "depthMean" column
        outpath: Output file path for the raster
        resolution: Grid resolution
        method: Interpolation method ('linear', 'cubic', 'nearest')
    """
    from scipy.interpolate import griddata
    import numpy as np
    
    # Get the bounding box
    xmin, ymin, xmax, ymax = gdf.total_bounds
    
    # Create regular grid
    x_grid = np.arange(xmin, xmax, resolution)
    y_grid = np.arange(ymin, ymax, resolution)
    xx, yy = np.meshgrid(x_grid, y_grid)
    
    # Extract points and values from geodataframe
    points = np.array([(geom.centroid.x, geom.centroid.y) for geom in gdf.geometry])
    values = gdf["depthMean"].values
    
    # Perform interpolation
    grid_z = griddata(points, values, (xx, yy), method=method, fill_value=np.nan)
    
    # Define the transform
    transform = from_origin(xmin, ymax, resolution, resolution)
    
    # Save the interpolated raster
    with rasterio.open(
        outpath,
        "w",
        driver="GTiff",
        height=grid_z.shape[0],
        width=grid_z.shape[1],
        count=1,
        dtype="float32",
        crs=gdf.crs,
        transform=transform,
        nodata=np.nan,
    ) as dst:
        dst.write(grid_z.astype('float32'), 1)
    
    print(f"Interpolated raster saved to {outpath}")

def visualize_raster(path):
    with rasterio.open(path) as src:
        bathy = src.read(1)
        xmin, ymin, xmax, ymax = src.bounds
    
    plt.imshow(
        bathy,
        extent=(xmin, xmax, ymin, ymax),
        origin="lower",
        cmap="viridis"
    )
    plt.colorbar(label="Depth (Feet)")
    plt.title("Rasterized Bathymetry")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.show()

def get_reach_date(surveyname):
    pattern = re.compile(r'([A-Za-z]{2}_[A-Za-z0-9]{2}_[A-Za-z]{3,4}_\d{8})')
    match = pattern.search(surveyname)

    if match:
        return match.group(1)
    else:  
        return None

# Query bathy data
AVAILABLE FIELD NAMES:
- Field Name: OBJECTID, Type: esriFieldTypeOID
- Field Name: surveyjobidpk, Type: esriFieldTypeString
- Field Name: sdsid, Type: esriFieldTypeString
- Field Name: sdsfeaturename, Type: esriFieldTypeString
- Field Name: sdsmetadataid, Type: esriFieldTypeString
- Field Name: surveytype, Type: esriFieldTypeString
- Field Name: channelareaidfk, Type: esriFieldTypeString
- Field Name: dateuploaded, Type: esriFieldTypeDate
- Field Name: usacedistrictcode, Type: esriFieldTypeString
- Field Name: surveydatestart, Type: esriFieldTypeDate
- Field Name: surveydateend, Type: esriFieldTypeDate
- Field Name: sourcedatalocation, Type: esriFieldTypeString
- Field Name: sourceprojection, Type: esriFieldTypeString
- Field Name: mediaidfk, Type: esriFieldTypeString
- Field Name: projectedarea, Type: esriFieldTypeDouble
- Field Name: sdsfeaturedescription, Type: esriFieldTypeString
- Field Name: dateloadedenterprise, Type: esriFieldTypeDate
- Field Name: datenotified, Type: esriFieldTypeDate
- Field Name: sourcedatacontent, Type: esriFieldTypeString
- Field Name: plotsheetlocation, Type: esriFieldTypeString
- Field Name: sourceagency, Type: esriFieldTypeString
- Field Name: globalid, Type: esriFieldTypeGlobalID
- Field Name: Shape__Area, Type: esriFieldTypeDouble
- Field Name: Shape__Length, Type: esriFieldTypeDouble

In [3]:
# initiate search parameters for eHydro

s2_cloud_cov = 20 ## percentage of clouds in sentinel-2 multispectral imagery, less means you see more surface
# search_date = '2015-06-27'  # Date threshold, getting data from Sentinel-2A launch date to now
search_date = '2019-01-01'      # change this, but going to try just training on the past 5ish years
usace_code = "CESWG"        # Galveston District (for now)

NUM_OF_QUERIES = 5          # number of iterations for the request to run
QUERY_TIME_DELAY = 2        # query time delay in seconds, used when requesting all features
URL = "https://services7.arcgis.com/n1YM8pTrFmm7L4hs/ArcGIS/rest/services/eHydro_Survey_Data/FeatureServer/0/query"
DOWNLOAD_DIR = f'/home/clay/Documents/SDB/{usace_code}/bathy'
os.makedirs(DOWNLOAD_DIR, exist_ok=True)

In [None]:
# Parameters for the initial query
params = {
    'where': f"surveydatestart >= '{search_date}' AND usacedistrictcode='{usace_code}'",
    'outFields': '*',  # Retrieve all fields
    'resultRecordCount': 2000,  # Maximum records per request
    'resultOffset': 0,  # Starting offset
    'f': 'json',  # Output format
    'outSR': '4326',  # Spatial reference
}

all_features = []

for i in range(NUM_OF_QUERIES):
    response = requests.get(URL, params=params)
    if response.status_code == 200:
        data = response.json()
        features = data.get('features', [])
        if not features:
            break
        all_features.extend(features)
        params['resultOffset'] += params['resultRecordCount']
        print(f"Retrieved {len(features)} features.")
        # time.sleep(QUERY_TIME_DELAY)  # Delay of 1 second
    else:
        print(f"Error: {response.status_code}, {response.text}")
        break

Since I've already got a ton of the ACOLITE files processed, might try to just match what I've got with what pops up in this search

# Search for overlapping S2SAFE data
- if no matching images, remove the survey

In [None]:
surveykeys = [feature['attributes']['surveyjobidpk'] for feature in all_features]

In [None]:
# just to redownload the surveys for the already processed ACOLITE data
acolite = '/home/clay/Documents/SDB/CESWG/s2_SAFE'
acolitepaths = [f for f in os.listdir(acolite)]
matching_strings = list(set(acolitepaths) & set(surveykeys))

In [None]:
bathyinfo = {}
for i, feature in enumerate(all_features):
    # gdf = gpd.GeoDataFrame(geometry=[loads(Polygon(feature['geometry']['rings'][0]).wkt)], 
    #                        crs=CRS.from_user_input(feature['attributes']['sourceprojection'])) 

    # # Convert to EPSG:4326 (lat/lon)
    # gdf_4326 = gdf.to_crs(epsg=4326)
    # wkt_4326 = gdf_4326.geometry.iloc[0].wkt
    
    bathyinfo[surveykeys[i]] = feature['attributes']['sourcedatalocation']

In [None]:
for survey in matching_strings:
    file_path = os.path.join(DOWNLOAD_DIR, f"{survey}.zip")
    try:
        download_file(bathyinfo[survey], file_path)
    except Exception as e:
        print(f"Failed to download {bathyinfo[survey]}: {e}")

print('='*250)
print("All files downloaded.")

# for survey, zip in bathyinfo.items():
#     file_path = os.path.join(DOWNLOAD_DIR, f"{survey}.zip")
#     try:
#         download_file(zip, file_path)
#     except Exception as e:
#         print(f"Failed to download {zip}: {e}")

# print('='*250)
# print("All files downloaded.")

# Unzip downloaded data

In [4]:
zipnames = [f[:-4] for f in os.listdir(DOWNLOAD_DIR) if f.endswith('.zip')]
if len(zipnames) > 0:
    for name in zipnames:
        zipfile_path = os.path.join(DOWNLOAD_DIR, f'{name}.zip')
        with zipfile.ZipFile(zipfile_path,'r') as zip_ref:
            zip_ref.extractall(zipfile_path[:-4])
            os.remove(zipfile_path)
    surveynames = [f for f in os.listdir(DOWNLOAD_DIR) if not f.startswith('.')]
else:
    surveynames = [f for f in os.listdir(DOWNLOAD_DIR) if not f.startswith('.')]

# Get the .gdb file, specifically the "Bathymetry_Vector" layer and the "depthMean" and "geometry" columns
- gets bathymetry raster in the native crs within eHydro
- Sentinel-2 retreival notebook will get the bbox and convert to EPSG:4326 for image searching
- Outputs in 10 ft resolution to match CSAT, but uses a simpler method since it will be reprojected to a coarser 10 meter resolution to match S2 anyways

Maybe also try TIN inerpolation (or something similar) to test if creating the bathy raster from the contour lines would be good

In [44]:
gdbinfo={}
output_dir = f'/media/clay/Crucial/SDB/{usace_code}/bathy_rasters'
os.makedirs(output_dir, exist_ok=True)

for name in surveynames:
    folder_path = os.path.join(DOWNLOAD_DIR, name)
    id = get_reach_date(name)
    gdbfile = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.gdb')][0]
    layers = fiona.listlayers(gdbfile)

    gdf = gpd.read_file(gdbfile, layer='Bathymetry_Vector')

    outfile = os.path.join(output_dir, f"{id}.tif")

    export_rasterized_bathymetry_native_crs_2(gdf, outfile, 10)  
    gdbinfo[name] = [gdf, outfile]

Interpolated raster saved to /media/clay/Crucial/SDB/CESWG/bathy_rasters/HS_06_CBB_20220325.tif
Interpolated raster saved to /media/clay/Crucial/SDB/CESWG/bathy_rasters/HS_01_BRF_20230921.tif
Interpolated raster saved to /media/clay/Crucial/SDB/CESWG/bathy_rasters/VT_03_MME_20230814.tif
Interpolated raster saved to /media/clay/Crucial/SDB/CESWG/bathy_rasters/CC_13_TTB_20240808.tif
Interpolated raster saved to /media/clay/Crucial/SDB/CESWG/bathy_rasters/CO_03_MFF_20220303.tif
Interpolated raster saved to /media/clay/Crucial/SDB/CESWG/bathy_rasters/GI_04_GCC_20240328.tif
Interpolated raster saved to /media/clay/Crucial/SDB/CESWG/bathy_rasters/TC_01_BRT_20240716.tif
Interpolated raster saved to /media/clay/Crucial/SDB/CESWG/bathy_rasters/HS_07_BGB_20221001.tif
Interpolated raster saved to /media/clay/Crucial/SDB/CESWG/bathy_rasters/BT_01_FLA_20231120.tif
Interpolated raster saved to /media/clay/Crucial/SDB/CESWG/bathy_rasters/GA_02_ENT_20190813.tif
Interpolated raster saved to /media/clay

# Now, go to 01b_get_s2.ipynb to use the extent of the valid data in the created bathymetry rasters to get cloud-masked Sentinel-2 L2A products from Google Earth Engine.