# Use this notebook to extract some of the needed data for training the model. Assuming the bathy data is downloaded from 01_get_data.ipynb, and the imagery is downloaded from Google Drive

In [10]:
import os
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
import numpy as np

# Functions

In [62]:
def normalize(array):
    return (array - np.nanmin(array)) / (np.nanmax(array) - np.nanmin(array))

def reproject_sentinel2(bathy_raster, s2_raster, reprojected_s2):
    # Open the bathymetry raster to get its CRS
    with rasterio.open(bathy_raster) as bathy_src:
        bathy_crs = bathy_src.crs

    # Open the Sentinel-2 raster for reprojection
    with rasterio.open(s2_raster) as s2_src:
        # Calculate the transform, width, and height for the new CRS
        transform, width, height = calculate_default_transform(
            s2_src.crs, bathy_crs, s2_src.width, s2_src.height, *s2_src.bounds
        )

        # Update metadata for the reprojected Sentinel-2 raster
        new_meta = s2_src.meta.copy()
        new_meta.update({
            "crs": bathy_crs,
            "transform": transform,
            "width": width,
            "height": height
        })

        # Reproject and save the Sentinel-2 raster
        with rasterio.open(reprojected_s2, "w", **new_meta) as dst:
            for i in range(1, s2_src.count + 1):  # Loop through bands
                reproject(
                    source=rasterio.band(s2_src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=s2_src.transform,
                    src_crs=s2_src.crs,
                    dst_transform=transform,
                    dst_crs=bathy_crs,
                    resampling=Resampling.cubic_spline  # Cubic spline interpolation
                )

    print(f"Reprojected Sentinel-2 raster saved to: {reprojected_s2}")

def resample_bathy_to_sentinel2(bathy_raster, sentinel_raster, resampled_bathy):

    with rasterio.open(sentinel_raster) as sentinel_src:
        sentinel_transform = sentinel_src.transform
        sentinel_crs = sentinel_src.crs
        sentinel_width = sentinel_src.width
        sentinel_height = sentinel_src.height

    with rasterio.open(bathy_raster) as bathy_src:
        resampled_bathymetry = np.empty((sentinel_height, sentinel_width), dtype=bathy_src.dtypes[0])

        # Perform the reprojection and resampling
        reproject(
            source=rasterio.band(bathy_src, 1),  # Use the bathymetry data as the source
            destination=resampled_bathymetry,   # Target array for the resampled bathymetry
            src_transform=bathy_src.transform,
            src_crs=bathy_src.crs,
            dst_transform=sentinel_transform,  # Match Sentinel-2 raster's transform
            dst_crs=sentinel_crs,              # Match Sentinel-2 raster's CRS
            dst_width=sentinel_width,
            dst_height=sentinel_height,
            resampling=Resampling.bilinear     
        )

        bathy_meta = bathy_src.meta.copy()
        bathy_meta.update({
            "transform": sentinel_transform,
            "crs": sentinel_crs,
            "width": sentinel_width,
            "height": sentinel_height,
            "dtype": bathy_src.dtypes[0]  # Use the bathymetry data type
        })

    # Save the resampled bathymetry raster
    with rasterio.open(resampled_bathy, "w", **bathy_meta) as dst:
        dst.write(resampled_bathymetry, 1)

    print(f"Resampled bathymetry raster saved to: {resampled_bathy}")

def clip_sentinel_by_bathy(bathy_raster, sentinel_raster, output_sentinel):
    # Open the bathymetry raster
    with rasterio.open(bathy_raster) as bathy_src:
        bathy_data = bathy_src.read(1)  # Read the first band (assumes single-band data)
        valid_bathy_mask = ~np.isnan(bathy_data)  # Non-NaN bathymetry pixels are valid
        bathy_transform = bathy_src.transform
        bathy_crs = bathy_src.crs

    # Open the Sentinel-2 raster
    with rasterio.open(sentinel_raster) as sentinel_src:
        # Ensure CRS and transform match
        if sentinel_src.crs != bathy_crs or sentinel_src.transform != bathy_transform:
            raise ValueError("Sentinel-2 raster must already be aligned with bathymetry raster.")

        sentinel_data = sentinel_src.read()  # Read all bands of Sentinel-2 raster

        # Create a mask for Sentinel-2 valid pixels (exclude zero values)
        valid_sentinel_mask = sentinel_data[0, :, :] != 0  # Assuming first band is representative

        # Combine masks (valid bathymetry AND valid Sentinel-2)
        combined_mask = valid_bathy_mask & valid_sentinel_mask

        # Apply the combined mask to Sentinel-2 data
        clipped_sentinel_data = np.where(combined_mask, sentinel_data, np.nan)

        # Update metadata
        sentinel_meta = sentinel_src.meta.copy()
        sentinel_meta.update({
            "dtype": "float32",
            "nodata": np.nan
        })

    # Save the clipped Sentinel-2 raster
    with rasterio.open(output_sentinel, "w", **sentinel_meta) as dst:
        dst.write(clipped_sentinel_data)

    print(f"Clipped Sentinel-2 raster saved to: {output_sentinel}")

def clip_bathy_by_sentinel(bathy_raster, clipped_sentinel_raster, output_bathy):
    # Open the clipped Sentinel-2 raster to create a valid mask
    with rasterio.open(clipped_sentinel_raster) as sentinel_src:
        sentinel_data = sentinel_src.read(1)  # Read the first band (assumes single-band data)
        valid_sentinel_mask = ~np.isnan(sentinel_data)  # Non-NaN pixels are valid
        sentinel_transform = sentinel_src.transform
        sentinel_crs = sentinel_src.crs

    # Open the bathymetry raster
    with rasterio.open(bathy_raster) as bathy_src:
        # Ensure CRS and transform match
        if bathy_src.crs != sentinel_crs or bathy_src.transform != sentinel_transform:
            raise ValueError("Bathymetry raster must already be aligned with Sentinel-2 raster.")

        bathy_data = bathy_src.read(1)  # Assuming single-band bathymetry data
        clipped_bathy_data = np.where(valid_sentinel_mask, bathy_data, np.nan)  # Mask bathymetry data

        # Update metadata
        bathy_meta = bathy_src.meta.copy()
        bathy_meta.update({
            "dtype": "float32",
            "nodata": np.nan
        })

    # Save the clipped bathymetry raster
    with rasterio.open(output_bathy, "w", **bathy_meta) as dst:
        dst.write(clipped_bathy_data, 1)

    print(f"Clipped bathymetry raster saved to: {output_bathy}")

# Establish working directories

In [18]:
S2_PATH = '/home/clay/Documents/SDB/CESWG/s2_rasters'
BATHY_PATH = '/home/clay/Documents/SDB/CESWG/bathy_rasters'
S2_PROJ = S2_PATH.replace('s2_rasters','s2_proj')
BATHY_PROJ = BATHY_PATH.replace('bathy_rasters','bathy_proj')

FINAL_PATH = '/home/clay/Documents/SDB/CESWG/processed'
S2_FINAL = os.path.join(FINAL_PATH, 'S2')
BATHY_FINAL = os.path.join(FINAL_PATH, 'Bathy')


os.makedirs(S2_PROJ, exist_ok=True)
os.makedirs(BATHY_PROJ, exist_ok=True)
os.makedirs(FINAL_PATH, exist_ok=True)
os.makedirs(S2_FINAL, exist_ok=True)
os.makedirs(BATHY_FINAL, exist_ok=True)

In [19]:
surveynames = [f[:-4] for f in os.listdir(BATHY_PATH) if f.endswith('.tif')]
surveyinfo = {}
for f in surveynames:
    surveyinfo[f] = [os.path.join(BATHY_PATH, f"{f}.tif"), os.path.join(S2_PATH, f"{f}.tif")]

# Reproject GEE Sentinel-2 products from EPSG:4326 to the eHydro CRS

In [None]:
for name, rasters in surveyinfo.items():
    reprojected_s2 = os.path.join(S2_PROJ, f"{name}.tif")
    reproject_sentinel2(rasters[0], rasters[1], reprojected_s2)

# Resample bathy rasters from 10 ft resolution to same resolution as S2 rasters

In [None]:
for name, rasters in surveyinfo.items():
    reprojected_bathy = os.path.join(BATHY_PROJ, f"{name}.tif")
    reprojected_s2 = os.path.join(S2_PROJ, f"{name}.tif")

    resample_bathy_to_sentinel2(rasters[0], reprojected_s2, reprojected_bathy)

In [33]:
# new dictionary with rasters of matching CRS and spatial resolution (~10 meters)

reprojected_rasters = {}
for name, rasters in surveyinfo.items():
    for raster in rasters:
        if 'bathy_rasters' in raster:
            bathypath = raster.replace('bathy_rasters', 'bathy_proj')
        elif 's2_rasters' in raster:
            s2path = raster.replace('s2_rasters', 's2_proj')
    reprojected_rasters[name] = [bathypath, s2path]

# Clip the S2 rasters by the valid bathymetry pixels
- need bounds of non np.nan pixels for clipping

In [None]:
for name, rasters in reprojected_rasters.items():
    reprojected_s2 = os.path.join(S2_FINAL, f"{name}.tif")

    clip_sentinel_by_bathy(rasters[0], rasters[1], reprojected_s2)

# Clip the bathymetry rasters by the valid cloud-masked S2 pixels
- need bounds of valid pixels, seems like these will be values above 0.0 since no nan-value is applied in GEE

In [None]:
for name, rasters in reprojected_rasters.items():
    final_bathy = os.path.join(BATHY_FINAL, f"{name}.tif")
    final_s2 = os.path.join(S2_FINAL, f"{name}.tif")

    clip_bathy_by_sentinel(rasters[0], final_s2, final_bathy)

Should be done with needed preprocessing, can now move to training the model on the data
- will try traditional ML regression models, as well as CNN
- May try majority voting of multiple training set models like in Tan et al. 2022

# Use ACOLITE for color corrections

In [None]:
# add acolite clone to Python path and import acolite
import sys, os
user_home = os.path.expanduser("~")
sys.path.append(user_home+'/git/acolite')
import acolite as ac

# add EARTHDATA_u and EARTHDATA_p
os.environ['EARTHDATA_u'] = ''
os.environ['EARTHDATA_p'] = ''

# scenes to process
bundles = ['/path/to/scene1', '/path/to/scene2']
# alternatively use glob
# import glob
# bundles = glob.glob('/path/to/scene*')

# output directory
odir = '/path/to/output/directory'

# optional 4 element limit list [S, W, N, E] 
limit = None

# optional file with processing settings
# if set to None defaults will be used
settings_file = None

# run through bundles
for bundle in bundles:
    # import settings
    settings = ac.acolite.settings.parse(settings_file)
    # set settings provided above
    settings['limit'] = limit
    settings['inputfile'] = bundle
    settings['output'] = odir
    # other settings can also be provided here, e.g.
    # settings['s2_target_res'] = 60
    # settings['dsf_aot_estimate'] = 'fixed'
    # settings['l2w_parameters'] = ['t_nechad', 't_dogliotti']

    # process the current bundle
    ac.acolite.acolite_run(settings=settings)