# Use this notebook to extract some of the needed data for training the model. Assuming the bathy data is downloaded from 01_get_data.ipynb, and the imagery is downloaded from Google Drive

In [183]:
import os
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.mask import mask
import numpy as np
import geopandas as gpd
import zipfile
from osgeo import gdal

# Functions

In [181]:
def normalize(array):
    return (array - np.nanmin(array)) / (np.nanmax(array) - np.nanmin(array))

def interpolate_bathymetry(surveyname, resolution, storage_dir):
    folder_path = os.path.join(BATHY_PATH, surveyname)
    input_shapefile = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.gdb')][0]
    output_raster = "/tmp/bathy_interp.tif"  # Output raster file path
    clipped_raster = '/tmp/bath_clip.tif'
    resampled_raster = os.path.join(storage_dir, f'{surveyname}.tif')
    z_field = "depthMean"  # Attribute containing bathymetry or depth values
    resolution = 10  # Desired pixel resolution in meters

    gdf = gpd.read_file(input_shapefile, layer="Bathymetry_Vector")
    xmin, ymin, xmax, ymax = gdf.total_bounds  # Get the extent of the layer

    # Calculate raster width and height
    width = round((xmax - xmin) / resolution)
    height = round((ymax - ymin) / resolution)

    # --- Step 3: Create the Raster Using gdal.Grid ---
    gdal.Grid(
        output_raster,                # Output raster path
        input_shapefile,              # Input vector data
        format="GTiff",               # Output file format
        algorithm="invdist",          # Interpolation method (IDW)
        zfield=z_field,               # Attribute containing bathymetry values
        outputBounds=[xmin, ymin, xmax, ymax],  # Set bounds
        width=width,                  # Number of columns
        height=height,                # Number of rows
        layers="Bathymetry_Vector",   # Specify the layer
        z_multiply=-1                 # Flip depths to negative
    )
    
    # --- Step 4: Clip the Raster to the GDF Geometry ---
    # Combine all geometries into a single boundary
    geometry = [gdf.geometry.union_all()]

    # Open the created raster and clip it using the GDF boundary
    with rasterio.open(output_raster) as src:
        clipped_image, clipped_transform = mask(
            src, geometry, crop=True, nodata=np.nan
        )
        clipped_meta = src.meta.copy()
        clipped_meta.update({
            "driver": "GTiff",
            "height": clipped_image.shape[1],
            "width": clipped_image.shape[2],
            "transform": clipped_transform,
            "nodata": np.nan
        })

    # Save the clipped raster to a new file
    with rasterio.open(clipped_raster, "w", **clipped_meta) as dst:
        dst.write(clipped_image[0], 1)  # Access first band

    # --- Step 5: Resample the Clipped Raster to 10m Resolution ---
    gdal.Warp(
        resampled_raster,       # Output resampled raster path
        clipped_raster,         # Input clipped raster
        xRes=resolution,              # Set pixel size in x direction
        yRes=resolution,              # Set pixel size in y direction
        resampleAlg=gdal.GRA_Bilinear, # Bilinear interpolation for resampling
        targetAlignedPixels=True,     # Align pixels to the grid
        dstNodata=np.nan              # Set NoData value to NaN
    )

    os.remove(output_raster)
    os.remove(clipped_raster)
    print(f"Resampled raster saved to: {resampled_raster}")

def reproject_sentinel2(bathy_raster, s2_raster, reprojected_s2):
    # Open the bathymetry raster to get its CRS
    with rasterio.open(bathy_raster) as bathy_src:
        bathy_crs = bathy_src.crs

    # Open the Sentinel-2 raster for reprojection
    with rasterio.open(s2_raster) as s2_src:
        # Calculate the transform, width, and height for the new CRS
        transform, width, height = calculate_default_transform(
            s2_src.crs, bathy_crs, s2_src.width, s2_src.height, *s2_src.bounds
        )

        # Update metadata for the reprojected Sentinel-2 raster
        new_meta = s2_src.meta.copy()
        new_meta.update({
            "crs": bathy_crs,
            "transform": transform,
            "width": width,
            "height": height
        })

        # Reproject and save the Sentinel-2 raster
        with rasterio.open(reprojected_s2, "w", **new_meta) as dst:
            for i in range(1, s2_src.count + 1):  # Loop through bands
                reproject(
                    source=rasterio.band(s2_src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=s2_src.transform,
                    src_crs=s2_src.crs,
                    dst_transform=transform,
                    dst_crs=bathy_crs,
                    resampling=Resampling.cubic_spline  # Cubic spline interpolation
                )

    print(f"Reprojected Sentinel-2 raster saved to: {reprojected_s2}")

# Establish working directories

In [184]:
s2_path = '/mnt/d/eHydro/tiffs'
BATHY_PATH = '/mnt/d/eHydro/bathy'

In [185]:
zipnames = [f[:-4] for f in os.listdir(BATHY_PATH) if f.endswith('.zip')]
if len(zipnames) > 0:
    for name in zipnames:
        zipfile_path = os.path.join(BATHY_PATH, f'{name}.zip')
        with zipfile.ZipFile(zipfile_path,'r') as zip_ref:
            zip_ref.extractall(zipfile_path[:-4])
            os.remove(zipfile_path)
    surveynames = [f for f in os.listdir(BATHY_PATH)]
else:
    surveynames = [f for f in os.listdir(BATHY_PATH)]

# Extract the needed data for bathymetry rasters from the downloaded .zip files

In [26]:
# use this if you need to unzip
bathyinfo = {}
for name in surveynames:
    folder_path = os.path.join(BATHY_PATH, name)
    gdb_file = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.gdb')][0]
    bathyvector = gpd.read_file(gdb_file, layer='Bathymetry_Vector')
    contours = gpd.read_file(gdb_file, layer="ElevationContour_ALL")

    bathyinfo[name] = [bathyvector, contours]


# Generate 10m pixel resolution bathymetry rasters from the .gdb files
Each .zip file comes with required and optional files. One of the required files is the .gdb file. This contains needed bathymetry and geospatial extents. Two files within the .gdb can be used to get the bathymetry: Bathymetry_Vector or ElevationContour_ALL. Bathymetry_Vector is a shapefile containing polygons assigned a mean depth. ElevationContour_ALL is a shapefile containing multipart lines with each line denoting an elevation. This raster generation for the bathymetry is done using the Bathymetry_Vector multipolygon shapefile

In [None]:
storage_dir = '/mnt/d/eHydro/bathy_rasters'
os.makedirs(storage_dir, exist_ok=True)

for key in bathyinfo.keys():
    interpolate_bathymetry(key, 10, storage_dir)

# Reproject GEE Sentinel-2 products from EPSG:4326 to the eHydro CRS

In [None]:
for name in surveynames:
    bathy_raster = os.path.join('/mnt/d/eHydro/bathy_rasters', f'{name}.tif')
    s2_raster = os.path.join('/mnt/d/eHydro/tiffs', f'{name}.tif')
    reprojected_s2 = os.path.join('/mnt/d/eHydro/s2_rasters', f'{name}.tif')

    reproject_sentinel2(bathy_raster, s2_raster, reprojected_s2)