# This notebook will be used to read in the two .nc files containing the Sentinel-1 Gamma0 backscatter, as well as the composite bands created from the backscatter. Using these .nc files, cloud-masked Sentinel-2 images will be retrieved from the GEE. The NDVI and NDWI will be used at 10-meter resolution. Otsu thresholding will be applied on all 9 bands, where the NDVI and NDWI will serve as refinement given the finer spatial resolution

In [1]:
import netCDF4 as nc
from netCDF4 import Dataset
import os
from osgeo import gdal
import rasterio
import ee
import geemap
import geemap.colormaps as cm
import numpy as np
from pyproj import Transformer
from datetime import datetime, timedelta
import re
import matplotlib.pyplot as plt
from osgeo import gdal

# Functions

In [None]:
def get_s2_sr_cld_col(aoi, start_date, end_date, cloud_filter):
    # Import and filter S2 SR.
    s2_sr_col = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
        .filterBounds(aoi)
        .filterDate(start_date, end_date)
        .filter(ee.Filter.lte('CLOUDY_PIXEL_PERCENTAGE', cloud_filter)))

    # Import and filter s2cloudless.
    s2_cloudless_col = (ee.ImageCollection('COPERNICUS/S2_CLOUD_PROBABILITY')
        .filterBounds(aoi)
        .filterDate(start_date, end_date))

    # Join the filtered s2cloudless collection to the SR collection by the 'system:index' property.
    combined_coll = ee.ImageCollection(ee.Join.saveFirst('s2cloudless').apply(**{
        'primary': s2_sr_col,
        'secondary': s2_cloudless_col,
        'condition': ee.Filter.equals(**{
            'leftField': 'system:index',
            'rightField': 'system:index'
        })
    }))

    return combined_coll.map(lambda img: img.clip(aoi))

def add_cloud_bands(img):
    # Get s2cloudless image, subset the probability band.
    cld_prb = ee.Image(img.get('s2cloudless')).select('probability')

    # Condition s2cloudless by the probability threshold value.
    is_cloud = cld_prb.gt(CLD_PRB_THRESH).rename('clouds')

    # Add the cloud probability layer and cloud mask as image bands.
    return img.addBands(ee.Image([cld_prb, is_cloud]))

def add_shadow_bands(img):
    # Identify water pixels from the SCL band.
    not_water = img.select('SCL').neq(6)

    # Identify dark NIR pixels that are not water (potential cloud shadow pixels).
    SR_BAND_SCALE = 1e4
    dark_pixels = img.select('B8').lt(NIR_DRK_THRESH*SR_BAND_SCALE).multiply(not_water).rename('dark_pixels')

    # Determine the direction to project cloud shadow from clouds (assumes UTM projection).
    shadow_azimuth = ee.Number(90).subtract(ee.Number(img.get('MEAN_SOLAR_AZIMUTH_ANGLE')));

    # Project shadows from clouds for the distance specified by the CLD_PRJ_DIST input.
    cld_proj = (img.select('clouds').directionalDistanceTransform(shadow_azimuth, CLD_PRJ_DIST*10)
        .reproject(**{'crs': img.select(0).projection(), 'scale': 100})
        .select('distance')
        .mask()
        .rename('cloud_transform'))

    # Identify the intersection of dark pixels with cloud shadow projection.
    shadows = cld_proj.multiply(dark_pixels).rename('shadows')

    # Add dark pixels, cloud projection, and identified shadows as image bands.
    return img.addBands(ee.Image([dark_pixels, cld_proj, shadows]))

def add_cld_shdw_mask(img):
    # Add cloud component bands.
    img_cloud = add_cloud_bands(img)

    # Add cloud shadow component bands.
    img_cloud_shadow = add_shadow_bands(img_cloud)

    # Combine cloud and shadow mask, set cloud and shadow as value 1, else 0.
    is_cld_shdw = img_cloud_shadow.select('clouds').add(img_cloud_shadow.select('shadows')).gt(0)

    # Remove small cloud-shadow patches and dilate remaining pixels by BUFFER input.
    # 20 m scale is for speed, and assumes clouds don't require 10 m precision.
    is_cld_shdw = (is_cld_shdw.focalMin(2).focalMax(BUFFER*2/20)
        .reproject(**{'crs': img.select([0]).projection(), 'scale': 20})
        .rename('cloudmask'))

    # Add the final cloud-shadow mask to the image.
    return img.addBands(is_cld_shdw)

def apply_cld_shdw_mask(img):
    # Subset the cloudmask band and invert it so clouds/shadow are 0, else 1.
    not_cld_shdw = img.select('cloudmask').Not()

    # Subset reflectance bands and update their masks, return the result.
    return img.select('B.*').updateMask(not_cld_shdw)

def export_image_to_drive(image, description, aoi):
    """
    Export a single image to Google Drive.

    Args:
        image: ee.Image, the image to be exported.
        description: str, unique description for the export task.
        aoi: ee.Geometry, the area of interest for the export.
    """

    image = image.select(['B2', 'B3', 'B4', 'B8'])
    # Setup the export task
    task = ee.batch.Export.image.toDrive(
        image=image,
        description=description,
        region=aoi,  # Make sure the geometry (aoi) is defined earlier
        fileFormat='GeoTIFF',
        scale=10  # Adjust the scale as needed
    )
    task.start()
    print(f'Exporting {description} to Drive...')

In [None]:
def hedley_glint_correction(image, SWIR_band='B11', bands=['B2', 'B3', 'B4', 'B8']):
    # Select bands
    swir = image.select(SWIR_band)
    coefficients = image.select(bands).divide(swir).reduce(ee.Reducer.mean())
    
    # Apply correction
    corrected = image.select(bands).subtract(swir.multiply(coefficients))
    return image.addBands(corrected, overwrite=True).set('glint_corrected', True)

def s2_10m_target_indices(image):
    ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
    ndwi = image.normalizedDifference(['B3', 'B8']).rename('NDWI')

    return image.addBands([ndvi, ndwi])

def otsu_gee(histogram):
    counts = ee.Array(ee.Dictionary(histogram).get('histogram'))
    means = ee.Array(ee.Dictionary(histogram).get('bucketMeans'))  # Correct key
    size = means.length().get([0])
    total = counts.reduce(ee.Reducer.sum(), [0]).get([0])
    sum_ = means.multiply(counts).reduce(ee.Reducer.sum(), [0]).get([0])
    mean = sum_.divide(total)

    indices = ee.List.sequence(1, size)

    # Compute between sum of squares (BSS)
    def compute_bss(i):
        i = ee.Number(i)
        a_counts = counts.slice(0, 0, i)
        a_count = a_counts.reduce(ee.Reducer.sum(), [0]).get([0])
        a_means = means.slice(0, 0, i)
        a_mean = a_means.multiply(a_counts).reduce(ee.Reducer.sum(), [0]).get([0]).divide(a_count)

        b_count = total.subtract(a_count)
        b_mean = sum_.subtract(a_count.multiply(a_mean)).divide(b_count)

        return a_count.multiply(a_mean.subtract(mean).pow(2)).add(
            b_count.multiply(b_mean.subtract(mean).pow(2))
        )

    bss = indices.map(compute_bss)

    # Return the mean value corresponding to the maximum BSS
    return means.sort(bss).get([-1])

def extract_valid_bounds_nc_to_epsg4326(nc_file, band_name):
    """Extracts the bounding box of valid (non-NaN) data from a NetCDF file and converts it to EPSG:4326."""
    
    # Open the NetCDF file
    dataset = nc.Dataset(nc_file, mode='r')

    # Ensure the band exists
    if band_name not in dataset.variables:
        raise ValueError(f"Band '{band_name}' not found in the NetCDF file.")

    # Read latitude, longitude, and band data
    lat = dataset.variables['lat'][:]
    lon = dataset.variables['lon'][:]
    data = dataset.variables[band_name][:]

    # Close the dataset
    dataset.close()

    # Create a mask for valid (non-NaN) pixels
    valid_mask = ~np.isnan(data)

    # Find the row and column indices of valid pixels
    valid_rows, valid_cols = np.where(valid_mask)

    if valid_rows.size == 0 or valid_cols.size == 0:
        raise ValueError("No valid data in the NetCDF file.")

    # Get the lat/lon bounds based on valid data
    min_lat, max_lat = lat[valid_rows.min()], lat[valid_rows.max()]
    min_lon, max_lon = lon[valid_cols.min()], lon[valid_cols.max()]

    # NetCDF usually stores lat/lon as 1D vectors, assuming they are regularly spaced.
    bounds_src_crs = (min_lon, min_lat, max_lon, max_lat)

    # Assuming the NetCDF data is in EPSG:4326, if not, transform to EPSG:4326
    transformer = Transformer.from_crs("EPSG:4326", "EPSG:4326", always_xy=True)
    min_lon_4326, min_lat_4326 = transformer.transform(min_lon, min_lat)
    max_lon_4326, max_lat_4326 = transformer.transform(max_lon, max_lat)

    # Create bounding box in EPSG:4326 format
    bounds_epsg4326 = (min_lon_4326, min_lat_4326, max_lon_4326, max_lat_4326)

    # Convert to Earth Engine Bounding Box
    bbox = ee.Geometry.BBox(bounds_epsg4326[0], bounds_epsg4326[1], bounds_epsg4326[2], bounds_epsg4326[3])

    return bbox

def get_date(image):
    return ee.Feature(None, {'date': image.date().format('YYYY-MM-dd')})

def mosaic_images_for_date(date, sentinel2_col):
    date_filter = ee.Filter.date(date, ee.Date(date).advance(1, 'day'))

    # Try to get Sentinel-2 mosaic; return None if no images available
    s2_filtered = sentinel2_col.filter(date_filter)
    s2_mosaic = ee.Algorithms.If(s2_filtered.size().gt(0), s2_filtered.mosaic(), None)
    
    return ee.Dictionary({
        'date': date,
        'S2': s2_mosaic
    })

## Function to add spectral indices images to the map.
def add_ind_to_map(image, map_object, band, date):

    if band =='NDWI':
        map_object.addLayer(image, {'min': -1, 'max': 1, 'bands': band, 'palette': cm.palettes.ndwi}, f'{date}_{band}')
    elif band =='NDVI': 
        map_object.addLayer(image, {'min': -1, 'max': 1, 'bands': band, 'palette': cm.palettes.ndvi}, f'{date}_{band}')

def filter_images_by_valid_pixels(mosaic_dict, subset_aoi, percentage, im_scale):
    good_ims = {}
    
    for date in list(mosaic_dict.keys()):
        image = ee.Image(mosaic_dict[date].get('S2'))
        valid_pixels = image.mask().reduceRegion(
                        reducer=ee.Reducer.sum(),
                        geometry=subset_aoi,
                        scale=im_scale,
                        maxPixels=1e13
                    ).values().get(0)
        total_pixels = subset_aoi.area().divide(im_scale ** 2)  # Estimate total pixels at given scale
        valid_fraction = ee.Number(valid_pixels).divide(total_pixels)

        if valid_fraction.getInfo() > percentage:
            good_ims[date] = image
        else:
            continue

    return good_ims

# Function to visualize the NetCDF bands
def visualize_nc_file(nc_file):
    # Open the NetCDF file
    dataset = nc.Dataset(nc_file, mode='r')

    # Extract variables
    lat = dataset.variables['lat'][:]
    lon = dataset.variables['lon'][:]
    gamma0_vv = dataset.variables['Gamma0_VV'][:]
    gamma0_vh = dataset.variables['Gamma0_VH'][:]

    # Close the dataset after reading
    dataset.close()

    # Determine global min/max for consistent color scaling
    vmin = min(gamma0_vv.min(), gamma0_vh.min())
    vmax = max(gamma0_vv.max(), gamma0_vh.max())

    # Plot Gamma0_VV
    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    im1 = plt.imshow(gamma0_vv, extent=[lon.min(), lon.max(), lat.min(), lat.max()],
                     cmap='gray', interpolation='nearest', vmin=vmin, vmax=vmax)
    plt.colorbar(im1, label='Gamma0_VV')
    plt.title('Gamma0_VV')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')

    # Plot Gamma0_VH
    plt.subplot(1, 2, 2)
    im2 = plt.imshow(gamma0_vh, extent=[lon.min(), lon.max(), lat.min(), lat.max()],
                     cmap='gray', interpolation='nearest', vmin=vmin, vmax=vmax)
    plt.colorbar(im2, label='Gamma0_VH')
    plt.title('Gamma0_VH')
    plt.xlabel('Longitude')

    plt.tight_layout()
    plt.show()

def check_spatial_resolution(nc_file):
    # Open the NetCDF file
    dataset = nc.Dataset(nc_file, mode='r')

    # Extract latitude and longitude
    lat = dataset.variables['lat'][:]
    lon = dataset.variables['lon'][:]

    # Close the dataset
    dataset.close()

    # Calculate the resolution by taking the mean difference
    lat_res = np.abs(lat[1] - lat[0])  # Latitude resolution
    lon_res = np.abs(lon[1] - lon[0])  # Longitude resolution

    print(f"Latitude resolution: {lat_res:.6f} degrees")
    print(f"Longitude resolution: {lon_res:.6f} degrees")

    # Approximate resolution in meters (assuming 1 degree ≈ 111 km)
    lat_res_m = lat_res * 111000
    lon_res_m = lon_res * 111000 * np.cos(np.deg2rad(lat.mean()))

    print(f"Approximate pixel size: {lat_res_m:.2f} meters (latitude), {lon_res_m:.2f} meters (longitude)")

In [None]:
ee.Initialize(project='')

# 1. Read in the custom Gamma0 .nc files

In [None]:
PROJECT = 'SabineRS'
SYSTEM = 'mac'          # or 'linux'


if SYSTEM == 'linux':
    grd_dir = f'/home/clay/Documents/{PROJECT}/Sentinel-1/GRD/ASCENDING/136/93/10_netcdfs'
    comp_dir =f'/home/clay/Documents/{PROJECT}/Sentinel-1/GRD/ASCENDING/136/93/11_composites'
else:
    grd_dir = f'/Volumes/SamsungExt/{PROJECT}/Sentinel-1/GRD/ASCENDING/136/93/10_netcdfs'
    comp_dir =f'/Volumes/SamsungExt/{PROJECT}/Sentinel-1/GRD/ASCENDING/136/93/11_composites'

    if isExist = os.path.exists(path)

In [None]:
if os.path.exists(grd_dir) == False:
    aoi = 

In [None]:
grd_dates = sorted(file[:-3] for file in os.listdir(grd_dir) if file.endswith('.nc')) # need to test NetCDF export
grd_files = sorted([os.path.join(grd_dir, f'{file}') for file in os.listdir(grd_dir) if file.endswith('.nc')]) # need to test NetCDF export
comp_files = sorted([os.path.join(comp_dir, f'{file}') for file in os.listdir(comp_dir) if file.endswith('.nc')]) # need to test NetCDF export

# visualize_nc_file(grd_files[0])
# check_spatial_resolution(grd_files[0])

In [None]:
aoi = extract_valid_bounds_nc_to_epsg4326(grd_files[0], 'Gamma0_VV')

# Draw a more refined aoi if you wish.
- Will use this to filter out the cloud-masked S2 NDVI and NDWI images to only contain images with your desired percentage of remaining pixels within the aoi subset

In [None]:
# one of those interactive map sequences I had
## interactive map for you to draw a polygon to signify your aoi

## Create a map centered at a specific location
m = geemap.Map(center=[20, 0], zoom=2, basemap='HYBRID')
m.centerObject(aoi, 8)

m.addLayer(aoi)
## Add drawing tools
m.add_draw_control()
## Display the map
display(m)

In [None]:
## Get the drawn features
draw_features = m.draw_features[0]
## Establish ee.Polygon from drawn area of interest to collect imagery
subset_aoi = ee.Geometry.Polygon(draw_features.getInfo()['geometry']['coordinates'][0])

# Retrieve Sentinel imagery from GEE

In [None]:
s2_cloud_cov = 20
START_DATE = '2019-10-01'
END_DATE = '2024-10-01'
CLD_PRB_THRESH = 35
NIR_DRK_THRESH = 0.15
CLD_PRJ_DIST = 2
BUFFER = 100
ORBIT = 'ASCENDING'

s2_coll = get_s2_sr_cld_col(aoi, START_DATE, END_DATE, s2_cloud_cov)

s2_cm = (s2_coll.map(hedley_glint_correction).map(add_cld_shdw_mask).map(apply_cld_shdw_mask).map(s2_10m_target_indices)).select(['NDVI', 'NDWI'])       # cloud masked sentinel-2 10-meter ndvi and ndwi bands

# Mosaic images with matching dates
- Common cells for GEE Sentinel imagery typically smaller than that of ASF/Copernicus
- Could result in multiple images for a single day
- Iterate through the information or each of the images in the S1 and S2 collections, mosaic matching dates.

In [None]:
s2_date_list = s2_cm.map(get_date).aggregate_array('date').getInfo()
unique_list = sorted(list(set(s2_date_list)))

In [None]:
# Create a dictionary of mosaicked images for all dates
mosaic_dict = {}
for date in unique_list:
    mosaics = mosaic_images_for_date(date, s2_cm)
    mosaic_dict[date] = mosaics

# Filter mosaics by percentage of pixels remaining within the subset aoi
- either simplify the storage of the images within the moasaic_dict, or improve the below function to accomodate the storage?
- Need to be able to determine the number of remaining pixels within the subset_aoi. If the percentage of pixels within the subset_aoi is above clear_threshold, the image will remain in the collection. If not, it will be removed.

In [None]:
filtered_dict = filter_images_by_valid_pixels(mosaic_dict, subset_aoi, 0.50, 10)

# Temporally register Sentinel-1 .nc files with Sentinel-2 images
- reduces amount of processing needed if Sentinel-2 images don't have a close match
- Different relook frequencies lead to epochs on different dates. Need to match up the dates as close as possible

In [None]:
filtered_dict

In [None]:
gee_dates = sorted(list(filtered_dict.keys()))

In [None]:
gee_dates

In [None]:
# grd_dates is 'YYYYMMDD' in format, gee_dates is 'YYYY-MM-DD'
# reformat the grd_dates
# check for matches, create dictionary containing the S1 .nc files and the corresponding S2 ee.Images

sorted(list(set(gee_dates + grd_dates)))

# Use this to visualize the Mosaics

In [None]:
# Initialize a map
Map = geemap.Map()
Map.centerObject(aoi, 12)

filtered_dates = sorted(list(filtered_dict.keys()))
for i in [0, 10, 50, 100]:
    image = filtered_dict[filtered_dates[i]]
    add_ind_to_map(image, Map, 'NDVI', filtered_dates[i])

# Display the map
Map.addLayerControl()  # Optional: Add a layer control panel
Map

# Otsu threshold
- Need to upsample the GRD images to ~20-meter resolution to match with interferograms
- Otsu on all 20-meter SAR bands
- Otsu on NDVI and NDWI 10-meter bands
- Majority voting with SAR, refinement with NDVI and NDWI
- Final shoul give water mask for each registered pair

In [None]:
ndvi_ims = {}
ndwi_ims={}

for band in ['NDVI', 'NDWI']:
    for date in filtered_dates:
        image = filtered_dict[filtered_dates[i]]
        histogram = image.select(band).reduceRegion(
        reducer=ee.Reducer.histogram(),
        geometry=aoi,
        scale=10,  # Adjust based on your dataset resolution
        maxPixels=1e15
        ).get(band)

        result = otsu_gee(histogram)
        threshold = result.getInfo()

        # Create binary mask
        binary_mask = image.select(band).gt(threshold)
        
    if band == 'NDVI':
        ndvi_ims[date] = binary_mask
    else:
        ndwi_ims[date] = binary_mask

In [None]:
map = geemap.Map()
map.centerObject(aoi, 8)

target_band = 'NDVI'

for i in [0, 50, 100]:
    image = filtered_dict[filtered_dates[i]]
    histogram = image.select(target_band).reduceRegion(
    reducer=ee.Reducer.histogram(),
    geometry=aoi,
    scale=10,  # Adjust based on your dataset resolution
    maxPixels=1e15
    ).get(target_band)
    
    result = otsu_gee(histogram)
    threshold = result.getInfo()

    # Create binary mask
    binary_mask = image.select(target_band).gt(threshold)
    add_ind_to_map(binary_mask, map, target_band, filtered_dates[i])

map.addLayerControl(position='topleft')
map