# This notebook will be used to take in the extent of the baseline interferogram and retrieve temporally registered S1 GRD images and cloud-masked S2 SR images. The S1 images will be used to create the below indices at the same resolution as the interferogram, while the S2 images will be used to create NDVI and NDWI images at 10-meter resolution. Otsu thresholding will be used for epoch-wise water masking, resulting in a final water masking at the same resolution of the interferogram. This will be used as the water mask for interferometry, as well as time-series data to understand horizontal redistribution of sediments

- VH/VV AND VV/VH
- RVI == (4 * VH) / (VH+VV)
- SDWI == np.log(10 * VV * VH)

In [15]:
import os
from osgeo import gdal
import rasterio
import ee
import geemap
import numpy as np
from pyproj import Transformer
from datetime import datetime, timedelta
import re
import matplotlib.pyplot as plt
from osgeo import gdal

In [4]:
ee.Initialize(project= 'ee-claycaldgsl')

# Functions

In [5]:

def get_s2_sr_cld_col(aoi, start_date, end_date, cloud_filter):
    # Import and filter S2 SR.
    s2_sr_col = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
        .filterBounds(aoi)
        .filterDate(start_date, end_date)
        .filter(ee.Filter.lte('CLOUDY_PIXEL_PERCENTAGE', cloud_filter)))

    # Import and filter s2cloudless.
    s2_cloudless_col = (ee.ImageCollection('COPERNICUS/S2_CLOUD_PROBABILITY')
        .filterBounds(aoi)
        .filterDate(start_date, end_date))

    # Join the filtered s2cloudless collection to the SR collection by the 'system:index' property.
    combined_coll = ee.ImageCollection(ee.Join.saveFirst('s2cloudless').apply(**{
        'primary': s2_sr_col,
        'secondary': s2_cloudless_col,
        'condition': ee.Filter.equals(**{
            'leftField': 'system:index',
            'rightField': 'system:index'
        })
    }))

    return combined_coll.map(lambda img: img.clip(aoi))

def add_cloud_bands(img):
    # Get s2cloudless image, subset the probability band.
    cld_prb = ee.Image(img.get('s2cloudless')).select('probability')

    # Condition s2cloudless by the probability threshold value.
    is_cloud = cld_prb.gt(CLD_PRB_THRESH).rename('clouds')

    # Add the cloud probability layer and cloud mask as image bands.
    return img.addBands(ee.Image([cld_prb, is_cloud]))

def add_shadow_bands(img):
    # Identify water pixels from the SCL band.
    not_water = img.select('SCL').neq(6)

    # Identify dark NIR pixels that are not water (potential cloud shadow pixels).
    SR_BAND_SCALE = 1e4
    dark_pixels = img.select('B8').lt(NIR_DRK_THRESH*SR_BAND_SCALE).multiply(not_water).rename('dark_pixels')

    # Determine the direction to project cloud shadow from clouds (assumes UTM projection).
    shadow_azimuth = ee.Number(90).subtract(ee.Number(img.get('MEAN_SOLAR_AZIMUTH_ANGLE')));

    # Project shadows from clouds for the distance specified by the CLD_PRJ_DIST input.
    cld_proj = (img.select('clouds').directionalDistanceTransform(shadow_azimuth, CLD_PRJ_DIST*10)
        .reproject(**{'crs': img.select(0).projection(), 'scale': 100})
        .select('distance')
        .mask()
        .rename('cloud_transform'))

    # Identify the intersection of dark pixels with cloud shadow projection.
    shadows = cld_proj.multiply(dark_pixels).rename('shadows')

    # Add dark pixels, cloud projection, and identified shadows as image bands.
    return img.addBands(ee.Image([dark_pixels, cld_proj, shadows]))

def add_cld_shdw_mask(img):
    # Add cloud component bands.
    img_cloud = add_cloud_bands(img)

    # Add cloud shadow component bands.
    img_cloud_shadow = add_shadow_bands(img_cloud)

    # Combine cloud and shadow mask, set cloud and shadow as value 1, else 0.
    is_cld_shdw = img_cloud_shadow.select('clouds').add(img_cloud_shadow.select('shadows')).gt(0)

    # Remove small cloud-shadow patches and dilate remaining pixels by BUFFER input.
    # 20 m scale is for speed, and assumes clouds don't require 10 m precision.
    is_cld_shdw = (is_cld_shdw.focalMin(2).focalMax(BUFFER*2/20)
        .reproject(**{'crs': img.select([0]).projection(), 'scale': 20})
        .rename('cloudmask'))

    # Add the final cloud-shadow mask to the image.
    return img.addBands(is_cld_shdw)

def apply_cld_shdw_mask(img):
    # Subset the cloudmask band and invert it so clouds/shadow are 0, else 1.
    not_cld_shdw = img.select('cloudmask').Not()

    # Subset reflectance bands and update their masks, return the result.
    return img.select('B.*').updateMask(not_cld_shdw)

def export_image_to_drive(image, description, aoi):
    """
    Export a single image to Google Drive.

    Args:
        image: ee.Image, the image to be exported.
        description: str, unique description for the export task.
        aoi: ee.Geometry, the area of interest for the export.
    """

    image = image.select(['B2', 'B3', 'B4', 'B8'])
    # Setup the export task
    task = ee.batch.Export.image.toDrive(
        image=image,
        description=description,
        region=aoi,  # Make sure the geometry (aoi) is defined earlier
        fileFormat='GeoTIFF',
        scale=10  # Adjust the scale as needed
    )
    task.start()
    print(f'Exporting {description} to Drive...')

In [6]:
def visualize_raster(path):
    with rasterio.open(path) as src:
        bathy = src.read(1)
        xmin, ymin, xmax, ymax = src.bounds
    
    plt.imshow(
        bathy,
        extent=(xmin, xmax, ymin, ymax),
        origin="lower",
        cmap="viridis"
    )
    plt.colorbar(label="Depth (Feet)")
    plt.title("Rasterized Bathymetry")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.show()

def add_rgb_to_map(image, map_object):

    # date = ee.Date(image.get('date')).format('YYYY-MM-dd').getInfo()
    map_object.addLayer(image, {'min': 0, 'max': 2000, 'bands': ['B4', 'B3', 'B2']}, f'rgb')
    
def extract_date(filepath):
    """extract search date window from the eHydro data"""
    match = re.search(r'\d{4}\d{2}\d{2}', filepath)
    date = datetime.strptime(match.group(), '%Y%m%d')
    return (date - timedelta(days=1)).strftime('%Y-%m-%d'), (date + timedelta(days=1)).strftime('%Y-%m-%d')

def extract_valid_bounds_to_epsg4326(raster_path):
    """Extracts the bounding box of valid (non-NaN) data from a raster and converts it to EPSG:4326."""
    with rasterio.open(raster_path) as src:
        # Read the raster data
        data = src.read(1)  # Assuming a single band
        transform = src.transform  # Affine transform of the raster
        src_crs = src.crs  # Source CRS of the raster

        # Create a mask for valid (non-NaN) pixels
        valid_mask = ~np.isnan(data)

        # Find the row and column indices of valid pixels
        valid_rows, valid_cols = np.where(valid_mask)

        if valid_rows.size == 0 or valid_cols.size == 0:
            raise ValueError("No valid data in the raster.")

        # Calculate the geographic coordinates of the valid bounds
        min_row, max_row = valid_rows.min(), valid_rows.max()
        min_col, max_col = valid_cols.min(), valid_cols.max()

        # Use the transform to convert row/col to geographic bounds
        min_x, min_y = rasterio.transform.xy(transform, min_row, min_col, offset="ul")
        max_x, max_y = rasterio.transform.xy(transform, max_row, max_col, offset="ul")

        # Bounds in the source CRS
        bounds_src_crs = (min_x, min_y, max_x, max_y)

        # Transform bounds to EPSG:4326
        transformer = Transformer.from_crs(src_crs, "EPSG:4326", always_xy=True)
        min_x_4326, min_y_4326 = transformer.transform(min_x, min_y)
        max_x_4326, max_y_4326 = transformer.transform(max_x, max_y)

        bounds_epsg4326 = (min_x_4326, min_y_4326, max_x_4326, max_y_4326)
        bbox = ee.Geometry.BBox(bounds_epsg4326[0], bounds_epsg4326[1], bounds_epsg4326[2], bounds_epsg4326[3])

    return bbox

# 1. Read in baseline wrapped interferogram to extract bbox for GEE image search

In [7]:
work_dir = '/Volumes/SamsungExt/WetlandsRS/Sabine_S1'

In [14]:
insar_dir = os.path.join(work_dir, 'interferometry')
wrap_ifg = os.path.join(insar_dir, f'work/merged/filt_topophase.flat.geo.vrt')
display(wrap_ifg)

'/Volumes/SamsungExt/WetlandsRS/Sabine_S1/interferometry/work/merged/filt_topophase.flat.geo.vrt'

In [16]:
dataset = gdal.Open(wrap_ifg)
print(dataset.GetMetadata())

{}


# Retrieve Sentinel-2 available Sentinel-2 images from GEE

# 1. Clip the Sentinel-1 bands to the same extent as the Sentinel-2 imagery 
- need to clip using the bbox from first image
- need to stack all GRD and GLCM bands into a single .tif or .vrt for respective dates
- stack/include the four labels raster bands as well

In [None]:
############### WSL #########################
# work_dir = '/mnt/d/SabineRS'

############### linux #########################
work_dir = '/home/wcc/Desktop/SabineRS/'

In [None]:
# set the directory for where your images are located

############### WSL #########################
# orig_ims = get_grd('/mnt/d/SabineRS/GRD/0_initial/backscatter')
# orig_glcms = get_glcm('/mnt/d/SabineRS/GRD/0_initial/glcm')

############### Linux #########################
orig_ims = get_grd('/home/wcc/Desktop/SabineRS/GRD/0_initial/backscatter')
orig_glcms = get_glcm('/home/wcc/Desktop/SabineRS/GRD/0_initial/glcm')

In [None]:
############### WSL #########################
# regrgbpath = '/mnt/d/SabineRS/MSI/RGB_NIR_SWIR1/registered'


############### Linux #######################
regrgbpath = '/home/wcc/Desktop/SabineRS/MSI/RGB_NIR_SWIR1/registered'


reg_rgb_ims = [os.path.join(regrgbpath, file) for file in os.listdir(regrgbpath) if file.endswith('.tif')]
reg_rgb_ims = sorted(reg_rgb_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

In [None]:
label_epsg = get_EPSG(reg_rgb_ims[0])
orig_epsg = get_EPSG(orig_ims[0])

In [None]:
src = gdal.Open(reg_rgb_ims[0])
geo_transform = src.GetGeoTransform()
# s2_bounds= box(s2_bounds.left, s2_bounds.bottom, s2_bounds.right, s2_bounds.top) #(minx, miny, maxx, maxy)
coords = [geo_transform[0], 
           geo_transform[0] + (src.RasterXSize * geo_transform[1]), 
           geo_transform[3] + (src.RasterYSize * geo_transform[5]), 
           geo_transform[3]
            ]
bbox = [coords[0], coords[2], coords[1], coords[3]]
bbox

In [None]:
for i , im in enumerate(orig_ims):
    clip_raster_by_bbox(im, os.path.join(work_dir, f'GRD/1_clipped/backscatter/{im[-17:]}'), bbox)

    clip_raster_by_bbox(orig_glcms[i], os.path.join(work_dir, f'GRD/1_clipped/{orig_glcms[i][-22:]}'), bbox)

In [None]:
# i = 0

# plot_vv_vh_with_bbox(orig_ims[i], bbox) # before clip

############### WSL #########################
# plot_vv_vh_with_bbox(f'/mnt/d/SabineRS/GRD/1_clipped/backscatter/{orig_ims[5][-17:]}', bbox) # after clip

############### Linux #######################
plot_vv_vh_with_bbox(f'/home/wcc/Desktop/SabineRS/GRD/1_clipped/backscatter/{orig_ims[120][-17:]}', bbox) # after clip

# Register Imagery

In [None]:
# get the reprojected and clipped data
clip_orig = get_grd(os.path.join(work_dir, f'GRD/1_clipped/backscatter'))
clip_orig_glcms = get_glcm(os.path.join(work_dir, f'GRD/1_clipped/glcm'))

clip_orig = sorted(clip_orig, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))
clip_orig_glcms = sorted(clip_orig_glcms, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

having problems with pca registration. I think it is just because my GRD and GLCM images have smaller bbox than my S2 classifications. Will be getting larger bbox images now to retry

In [None]:
# get PCA for SAR iamgery based on VV and VH bands only (no GLCM)

# PCA for each image
for im in clip_orig:
    perform_pca(im, os.path.join(work_dir, f'GRD/1_clipped/pca/{im[-14:]}'))


pcapath = os.path.join(work_dir,'GRD/1_clipped/pca')
pca_ims = [os.path.join(pcapath, file) for file in os.listdir(pcapath) if file.endswith('.tif')]
pca_ims = sorted(pca_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

In [None]:
ref_ds = gdal.Open(os.path.join(work_dir, f'MSI/RGB_NIR_SWIR1/rgb_average_pca.tif'))
ref_im = ref_ds.GetRasterBand(1).ReadAsArray()

for i, im in enumerate(pca_ims):
    pca_ds = gdal.Open(im)
    pca_im = pca_ds.GetRasterBand(1).ReadAsArray() 

    warp_matrix = register_images(ref_im,pca_im)

    if warp_matrix is not None:
        orig_dataset = gdal.Open(clip_orig[i])
        orig_bands = [orig_dataset.GetRasterBand(j + 1).ReadAsArray() for j in range(orig_dataset.RasterCount)]

        glcm_ds = gdal.Open(clip_orig_glcms[i])
        glcm_bands = [glcm_ds.GetRasterBand(j + 1).ReadAsArray() for j in range(glcm_ds.RasterCount)]

        transformed_orig = apply_transformation_to_all_bands(orig_bands, warp_matrix, ref_im.shape)
        transformed_glcm = apply_transformation_to_all_bands(glcm_bands, warp_matrix, ref_im.shape)

        save_multiband_image_as_tiff(os.path.join(work_dir, f'GRD/2_registered/backscatter/{clip_orig[i][-17:]}'), transformed_orig, orig_dataset)
        save_multiband_image_as_tiff(os.path.join(work_dir, f'GRD/2_registered/glcm/{clip_orig_glcms[i][-17:]}'), transformed_glcm, glcm_ds)

# Generate VV/VH ratio bands

In [None]:
regpath = os.path.join(work_dir,'GRD/2_registered')

reg_grd_ims = [os.path.join(regpath, f'backscatter/{file}') for file in os.listdir(os.path.join(regpath, 'backscatter')) if file.endswith('.tif')]
reg_grd_ims = sorted(reg_grd_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

reg_glcm_ims = [os.path.join(regpath, f'glcm/{file}') for file in os.listdir(os.path.join(regpath, 'glcm')) if file.endswith('.tif')]
reg_glcm_ims = sorted(reg_glcm_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

In [None]:
# vv, vh, sentinel-1 radar veg index, and sentinel-1 dual-pol water index

# sar_ratio(reg_grd_ims, f'/mnt/d/SabineRS/GRD/3_ratio')
sar_ratio(reg_grd_ims, f'/home/wcc/Desktop/SabineRS/GRD/3_ratio')
ratio_ims = [os.path.join('/home/wcc/Desktop/SabineRS/GRD/3_ratio', file) for file in os.listdir('/home/wcc/Desktop/SabineRS/GRD/3_ratio') if file.endswith('.tif')]
ratio_ims = sorted(ratio_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))