# This notebook is going to serve as my workflow for estimating NCF bathymetries using historical eHydro sureys and EO multispectral data. I will train a NN on eHydro bathy surveys to estimate waterway bathymetries using corrected surface reflectances
- get bathy surveys from https://services7.arcgis.com/n1YM8pTrFmm7L4hs/ArcGIS/rest/services/eHydro_Survey_Data/FeatureServer/0/query
- S2 imagery from GEE
- TF for NN

In [None]:
import requests
import os
import json
import ee
import geemap
import re
from datetime import datetime, timedelta
import time

In [None]:
ee.Initialize(project = '') ##enter your project name here as a string to initialize exchanges with ee api

# Functions

In [None]:
## collects sentinel-1 GRD (radar, no phase) and Sentinel-2 SR (multispectral, adjusted for top of atmosphere reflectance)
def get_sentinel_imagery(aoi, start_date, end_date, s2_cloud_cov):
    ## Sentinel-2 Surface Reflectance Harmonized ImageCollection
    s2_10m = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
               .filterBounds(aoi)
               .filterDate(ee.Date(start_date), ee.Date(end_date))
               .map(lambda img: img.set('date', ee.Date(img.date()).format('YYYYMMdd')))
               .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', s2_cloud_cov))
               .sort('date')
               .select(['B2', 'B3', 'B4', 'B8', 'B11', 'B12'])
    )
    ## Clip all images in the collection to the AOI
    s2_10m = s2_10m.map(lambda img: img.clip(aoi))

    return s2_10m

def add_rgb_to_map(image, map_object, num):
    date = ee.Date(image.get('date')).format('YYYY-MM-dd').getInfo()
    map_object.addLayer(image, {'min': 0, 'max': 2000, 'bands': ['B4', 'B3', 'B2']}, f'{num}_rgb')

def get_gee_search_dates(time):
    date_obj = datetime.utcfromtimestamp(time / 1000)
    return ((date_obj - timedelta(days=1)).strftime('%Y-%m-%d'), (date_obj + timedelta(days=1)).strftime('%Y-%m-%d'))

def ehydro_date_convert(time):
    return datetime.utcfromtimestamp(time / 1000).strftime('%Y-%m-%d')

def request_all_features(url, params):
    # Initialize a list to store all features
    all_features = []

    # Iteratively fetch data
    while True:
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            features = data.get('features', [])
            if not features:
                break
            all_features.extend(features)
            params['resultOffset'] += params['resultRecordCount']
            time.sleep(QUERY_TIME_DELAY)  # Delay of 1 second
        else:
            print(f"Error: {response.status_code}, {response.text}")
            break
    
    return all_features

def request_subset_features(url, params):
    # Initialize a list to store all features
    all_features = []

    # Iteratively fetch data
    while True:
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            features = data.get('features', [])
            if not features:
                break
            all_features.extend(features)
        else:
            print(f"Error: {response.status_code}, {response.text}")
            break
    
    return all_features

# Query bathy data
AVAILABLE FIELD NAMES:
- Field Name: OBJECTID, Type: esriFieldTypeOID
- Field Name: surveyjobidpk, Type: esriFieldTypeString
- Field Name: sdsid, Type: esriFieldTypeString
- Field Name: sdsfeaturename, Type: esriFieldTypeString
- Field Name: sdsmetadataid, Type: esriFieldTypeString
- Field Name: surveytype, Type: esriFieldTypeString
- Field Name: channelareaidfk, Type: esriFieldTypeString
- Field Name: dateuploaded, Type: esriFieldTypeDate
- Field Name: usacedistrictcode, Type: esriFieldTypeString
- Field Name: surveydatestart, Type: esriFieldTypeDate
- Field Name: surveydateend, Type: esriFieldTypeDate
- Field Name: sourcedatalocation, Type: esriFieldTypeString
- Field Name: sourceprojection, Type: esriFieldTypeString
- Field Name: mediaidfk, Type: esriFieldTypeString
- Field Name: projectedarea, Type: esriFieldTypeDouble
- Field Name: sdsfeaturedescription, Type: esriFieldTypeString
- Field Name: dateloadedenterprise, Type: esriFieldTypeDate
- Field Name: datenotified, Type: esriFieldTypeDate
- Field Name: sourcedatacontent, Type: esriFieldTypeString
- Field Name: plotsheetlocation, Type: esriFieldTypeString
- Field Name: sourceagency, Type: esriFieldTypeString
- Field Name: globalid, Type: esriFieldTypeGlobalID
- Field Name: Shape__Area, Type: esriFieldTypeDouble
- Field Name: Shape__Length, Type: esriFieldTypeDouble

For training the model, will probably want to include options for with:
- usace district
- time of year (date and season)
- NCF ID
- survey type (single vs dual beam; XC, BD, AD, etc.)

In [None]:
# initiate search parameters for eHydro

s2_cloud_cov = 1 ## percentage of clouds in sentinel-2 multispectral imagery, less means you see more surface
search_date = '2018-01-01'  # Date threshold, getting data from 2018 to present
usace_code = "CESWG"        # Galveston District (for now)
QUERY_TIME_DELAY = 5        # query time delay in seconds, used when requesting all features
URL = "https://services7.arcgis.com/n1YM8pTrFmm7L4hs/ArcGIS/rest/services/eHydro_Survey_Data/FeatureServer/0/query"


In [None]:
# Parameters for the initial query
params = {
    'where': f"surveydatestart >= '{search_date}' AND usacedistrictcode='{usace_code}'",
    'outFields': '*',  # Retrieve all fields
    'resultRecordCount': 2000,  # Maximum records per request
    'resultOffset': 0,  # Starting offset
    'f': 'json',  # Output format
    'outSR': '4326',  # Spatial reference
}

all_features = []

while True:
    response = requests.get(URL, params=params)
    if response.status_code == 200:
        data = response.json()
        features = data.get('features', [])
        if not features:
            break
        all_features.extend(features)
        params['resultOffset'] += params['resultRecordCount']
        print(f"Retrieved {len(features)} features.")
        time.sleep(1)  # Delay of 1 second
    else:
        print(f"Error: {response.status_code}, {response.text}")
        break

In [None]:
params = {
    'where': f"surveydatestart >= '{search_date}' AND usacedistrictcode='{usace_code}'",
    'outFields': '*',  # Retrieve all fields
    'resultRecordCount': 50,
    'f': 'json',  # Output format
    'outSR': '4326',  # Spatial reference
}

# Send the request
response = requests.get(URL, params=params)

# Handle the response
if response.status_code == 200:
    data = response.json()
    features = data.get('features', [])
    print(f"Retrieved {len(features)} features.")
else:
    print(f"Error: {response.status_code}, {response.text}")

In [None]:
# Optional: Save to a file for later use
import json
with open("all_features.json", "w") as f:
    json.dump(all_features, f)

1. Extract date and aoi from the surveys for GEE
- plan is to iterate through the queries (probably by district code) and check to see if GEE has a corresponding Sentinel-2 image

In [None]:
geeinfo = {}
dates = []
for feature in features:
    dates.append(ehydro_date_convert(feature['attributes']['surveydatestart']))
    area = ee.Geometry.Polygon(feature['geometry']['rings'][0])

    date_tuple = get_gee_search_dates(feature['attributes']['surveydatestart'])

    geeinfo[feature['attributes']['surveyjobidpk']] = [area, date_tuple]
surveykeys = list(geeinfo.keys())

2. Iterate through responses and check if GEE has corresponding image(s)
- if not, the response will be deleted

In [None]:
for survey, items in geeinfo.items():
    aoi = items[0]
    dates = items[1]

    coll = get_sentinel_imagery(aoi, dates[0], dates[1], s2_cloud_cov)

    if coll.size().getInfo() > 0:
        geeinfo[survey].append(coll)

In [None]:
goodsurveys = []
for survey, items in geeinfo.items():
    if len(items) > 2:
        goodsurveys.append(survey)

len(goodsurveys)

3. Extract and download the eHydro bathy data locally

In [None]:
bathyinfo = {}
for i, feature in enumerate(features):
    bathyinfo[surveykeys[i]] = feature['attributes']['sourcedatalocation']

# Get EE imagery for each survey
- Sentinel-2 Surface Reflectance product, test multiple individual bands (red, blue, NIR may be best)
- At some point, if S2 results good, try Planet 3m-daily imagery

In [None]:
Map = geemap.Map()
Map.centerObject(geeinfo[goodsurveys[2]][0], 15)


s2_images = geeinfo[goodsurveys[2]][-1].toList(geeinfo[goodsurveys[2]][-1].size())
for i in range(geeinfo[goodsurveys[2]][-1].size().getInfo()):
    image = ee.Image(s2_images.get(i))
    add_rgb_to_map(image, Map, i)

Map.addLayer(geeinfo[goodsurveys[2]][0])

# Display the map.
Map.addLayerControl(position = 'topright')
Map

# Export the Sentinel-2 imagery from GEE for download locally

In [None]:
def export_image_to_drive(image, index):
    # Define the description for the export, incorporating the index for uniqueness

    description = f's2_10m_{index}'

    # Setup the export task
    task = ee.batch.Export.image.toDrive(
        image=image,
        description=description,
        region=aoi,  # Make sure the geometry is defined earlier
        fileFormat='GeoTIFF',
        scale = 10
    )
    task.start()
    print(f'Exporting {description} to Drive...')

def export_all_images(collection, date_list):
    image_list = collection.toList(collection.size())  # Convert ImageCollection to List

    for i, date in enumerate(date_list):
        image = ee.Image(image_list.get(i))
        export_image_to_drive(image, date[:10])

## fucntion to get the date of each image in the image collection
def get_date(image):
    return ee.Feature(None, {'date': image.date().format('YYYY-MM-dd')})

In [None]:
s2_date_list = geeinfo['BR_02_JEC_20210630_AD_01'][-1].map(get_date).aggregate_array('date').getInfo()
export_all_images(geeinfo['BR_02_JEC_20210630_AD_01'][-1], s2_date_list)