In [76]:
import numpy as np
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import os
from osgeo import gdal, osr
from pyproj import CRS
from shapely.geometry import box
from shapely.geometry import mapping
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.ticker as mticker
from shapely.geometry import box
from matplotlib.patches import Polygon
from datetime import datetime
import cv2

# Functions

In [181]:
def get_labels(labelpath):
    man_ims = [os.path.join(labelpath, f'manual/{file}') for file in os.listdir(os.path.join(labelpath, f'manual')) if file.endswith('.tif')]
    otsu_ims = [os.path.join(labelpath, f'otsu/{file}') for file in os.listdir(os.path.join(labelpath, f'otsu')) if file.endswith('.tif')]
    kmeans_ims = [os.path.join(labelpath, f'kmeans/{file}') for file in os.listdir(os.path.join(labelpath, f'kmeans')) if file.endswith('.tif')]
    gmm_ims = [os.path.join(labelpath, f'gmm/{file}') for file in os.listdir(os.path.join(labelpath, f'gmm')) if file.endswith('.tif')]

    return man_ims, otsu_ims, kmeans_ims, gmm_ims

def get_grd(grdpath):
    orig_ims = [os.path.join(grdpath, file) for file in os.listdir(grdpath) if file.endswith('.tif')]

    return orig_ims

def get_glcm(glcmpath):
    orig_glcms = [os.path.join(glcmpath, file) for file in os.listdir(glcmpath) if file.endswith('.tif')]

    return orig_glcms

# Clip each Sentinel-1 image
def clip_sentinel1_image(s1_path, output_path, s2_bounds):
    with rasterio.open(s1_path) as src:
        # Calculate the window corresponding to the bounding box (extent)
        window = rasterio.windows.from_bounds(*s2_bounds, transform=src.transform)
        
        # Read and clip the Sentinel-1 image
        clipped_image = src.read(window=window)
        
        # Create metadata for the clipped image
        out_meta = src.meta.copy()
        out_meta.update({
            'height': window.height,
            'width': window.width,
            'transform': src.window_transform(window)
        })
        
        # Write the clipped image to a new file
        with rasterio.open(output_path, 'w', **out_meta) as dst:
            dst.write(clipped_image)

def export_s1images(coll, s2_bounds, type):
    """
    type = str
        'original' or 'gammamap'
    
    """

    for s1_path in enumerate(coll):
        output_path = os.path.join(s1_path[:16], f'Clipped/{type}/{s1_path[-17:]}')
        clip_sentinel1_image(s1_path, output_path, s2_bounds)

def plot_vv_vh_with_bbox(image_path, bbox):
    # Open the image using rasterio (assuming VV and VH are the first two bands)
    with rasterio.open(image_path) as src:
        # Read the VV and VH bands
        vv = src.read(1)  # VV is in the first band
        vh = src.read(2)  # VH is in the second band

        # Get the extent of the image (top-left and bottom-right coordinates)
        transform = src.transform
        height, width = vv.shape
        top_left = rasterio.transform.xy(transform, 0, 0, offset='center')
        bottom_right = rasterio.transform.xy(transform, height - 1, width - 1, offset='center')

    # Extract easting and northing from the corners
    min_easting, max_northing = top_left
    max_easting, min_northing = bottom_right

    # Prepare the bounding box as a shapely geometry
    bbox_geom = box(*bbox)  # Ensure bbox is an iterable [min_x, min_y, max_x, max_y]

    fig, ax = plt.subplots(1, 2, figsize=(14, 6))  # Two subplots for VV and VH bands

    # VV raster visualization with bounding box
    ax[0].imshow(vv, cmap='gray', extent=[min_easting, max_easting, min_northing, max_northing])
    x, y = bbox_geom.exterior.xy  # Extract coordinates for plotting the bounding box
    ax[0].plot(x, y, color='red', linewidth=2, label="Sentinel-2 Coverage")
    ax[0].set_title('VV Band with Bounding Box')
    ax[0].set_xlabel('Easting (meters)')
    ax[0].set_ylabel('Northing (meters)')
    ax[0].xaxis.set_major_locator(mticker.MaxNLocator(5))  # Reduce x-axis ticks
    ax[0].yaxis.set_major_locator(mticker.MaxNLocator(5))  
    ax[0].legend(loc='lower right')

    # VH raster visualization with bounding box
    ax[1].imshow(vh, cmap='gray', extent=[min_easting, max_easting, min_northing, max_northing])
    ax[1].plot(x, y, color='red', linewidth=2, label="Sentinel-2 Coverage")
    ax[1].set_title('VH Band with Bounding Box')
    ax[1].set_xlabel('Easting (meters)')
    ax[1].set_ylabel('Northing (meters)')
    ax[1].xaxis.set_major_locator(mticker.MaxNLocator(5))  # Reduce x-axis ticks
    ax[1].yaxis.set_major_locator(mticker.MaxNLocator(5))  
    ax[1].legend(loc='lower right')

    # Show the plot with layout adjustments
    plt.tight_layout()
    plt.show()

def get_EPSG(im):
    src = gdal.Open(im)
    wkt_projection = src.GetProjection()
    spatial_ref = osr.SpatialReference()
    spatial_ref.ImportFromWkt(wkt_projection)
    epsg_code = spatial_ref.GetAttrValue('AUTHORITY', 1)
    print(epsg_code)

    return epsg_code 

def reproject_raster(input_raster, output_raster, target_crs='EPSG:32615'):

    # Reproject using gdal.Warp
    warp_options = gdal.WarpOptions(dstSRS=target_crs)
    gdal.Warp(output_raster, input_raster, options=warp_options)

def clip_raster_by_bbox(input_raster, output_raster, bbox):
    # Define the output bounds (min_x, min_y, max_x, max_y)
    min_x, min_y, max_x, max_y = bbox

    # Use gdal.Translate to clip the raster by the bounding box
    options = gdal.TranslateOptions(projWin=[min_x, max_y, max_x, min_y])  # Note the order: projWin=[min_x, max_y, max_x, min_y]
    
    # Perform the clipping operation
    gdal.Translate(output_raster, input_raster, options=options)


def perform_pca(image_path, output_pca_path):
    dataset = gdal.Open(image_path)

    # Read all bands as separate arrays
    bands = [dataset.GetRasterBand(1).ReadAsArray(),  dataset.GetRasterBand(2).ReadAsArray()]

    # Convert the list of bands into a 3D NumPy array (bands, rows, cols)
    bands_array = np.stack(bands, axis=0)

    # Reshape the bands array into (pixels, bands) for PCA
    pixels, bands_count = bands_array.shape[1] * bands_array.shape[2], bands_array.shape[0]
    flattened_image = bands_array.reshape(bands_count, -1).T  # Shape: (pixels, bands)

    # Convert to float32 for OpenCV PCA
    flattened_image = flattened_image.astype(np.float32)

    # Perform PCA using OpenCV (reduce to 1 principal component)
    mean, eigenvectors = cv2.PCACompute(flattened_image, mean=None, maxComponents=1)
    pca_result = cv2.PCAProject(flattened_image, mean, eigenvectors)

    # Reshape the PCA result back to the original image dimensions
    pca_image = pca_result.reshape(bands_array.shape[1], bands_array.shape[2])
    # pca_image = np.nan_to_num(pca_image, nan=0.0, posinf=255.0, neginf=0.0)

    # Normalize the PCA image to 0-255 for OpenCV processing
    pca_image_normalized = cv2.normalize(pca_image, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)

    # Save the PCA-reduced image
    output = gdal.GetDriverByName('GTiff').Create(output_pca_path, dataset.RasterXSize, dataset.RasterYSize, 1, gdal.GDT_Float32)
    output.SetProjection(dataset.GetProjection())
    output.SetGeoTransform(dataset.GetGeoTransform())
    output.GetRasterBand(1).WriteArray(pca_image_normalized)
    output.FlushCache()  # Ensure data is written to disk
    output = None

def register_images(reference_image, target_image):
    # Define warp mode: use affine transformation (can also use cv2.MOTION_EUCLIDEAN)
    warp_mode = cv2.MOTION_TRANSLATION

    # Initialize the transformation matrix (2x3 affine transformation matrix)
    warp_matrix = np.eye(2, 3, dtype=np.float32)

    # Define criteria for the ECC algorithm
    criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 5000, 1e-6)

    # Perform the ECC algorithm to find the transformation matrix
    try:
        cc, warp_matrix = cv2.findTransformECC(reference_image, target_image, warp_matrix, warp_mode, criteria)
    except cv2.error as e:
        print(f"Error in ECC: {e}")
        return None

    return warp_matrix

def apply_transformation_to_all_bands(target_bands, warp_matrix, image_shape, output_dtype=np.float32):
    transformed_bands = []
    
    for band in target_bands:
        # Apply the transformation to the band
        transformed_band = cv2.warpAffine(band.astype(np.float32), warp_matrix, (image_shape[1], image_shape[0]), 
                                          flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP)
        
        # Handle NaN or infinite values by replacing them with valid values (e.g., 0)
        # transformed_band = np.nan_to_num(transformed_band, nan=0.0, posinf=0.0, neginf=0.0)
        
        # Convert to the desired output data type
        transformed_band = transformed_band.astype(output_dtype)
        
        transformed_bands.append(transformed_band)
    
    return transformed_bands

def save_multiband_image_as_tiff(output_path, transformed_bands, reference_dataset, gdal_dtype=gdal.GDT_Float32):
    # Create an output GeoTIFF file with the same dimensions and the same number of bands
    driver = gdal.GetDriverByName('GTiff')
    out_dataset = driver.Create(output_path, reference_dataset.RasterXSize, reference_dataset.RasterYSize, len(transformed_bands), gdal_dtype)

    # Set the projection and geotransform from the reference dataset
    out_dataset.SetProjection(reference_dataset.GetProjection())
    out_dataset.SetGeoTransform(reference_dataset.GetGeoTransform())

    # Write each transformed band to the output file
    for i, transformed_band in enumerate(transformed_bands):
        out_dataset.GetRasterBand(i + 1).WriteArray(transformed_band)

    # Flush data to disk
    out_dataset.FlushCache()
    out_dataset = None

# 1. Clip the Sentinel-1 bands to the same extent as the Sentinel-2 imagery 
- need to clip using the bbox from first image
- need to stack all GRD and GLCM bands into a single .tif or .vrt for respective dates
- stack/include the four labels raster bands as well

In [239]:
# set the directory for where your images are located

############### WSL #########################
# man_ims, otsu_ims, kmeans_ims, gmm_ims = get_labels('/mnt/d/SabineMSI/s2classifications')
# orig_ims, filt_ims = get_grd('/mnt/d/SabineGRD/GEE')
# orig_glcms, filt_glcms = get_glcm('/mnt/d/SabineGRD/GLCM')

############### Linux #########################
man_ims, otsu_ims, kmeans_ims, gmm_ims = get_labels('/home/wcc/Desktop/SabineRS/MSI/s2classifications')
orig_ims = get_grd('/home/wcc/Desktop/SabineRS/GRD/0_initial/backscatter')
orig_glcms = get_glcm('/home/wcc/Desktop/SabineRS/GRD/0_initial/glcm')

# combine the water-land labels into a single .vrt raster
for i, im in enumerate(man_ims):
    vrt_options = gdal.BuildVRTOptions(separate=True)
    vrt = gdal.BuildVRT(f'/home/wcc/Desktop/SabineRS/MSI/s2classifications/combined/labels_{im[-14:-4]}.vrt', [im, otsu_ims[i], kmeans_ims[i], gmm_ims[i]], options=vrt_options)
    # gdal.Translate(f'/home/wcc/Desktop/SabineRS/MSI/s2classifications/combined/labels_{im[-14:]}', vrt)
    vrt = None


combined_labels = [os.path.join('/home/wcc/Desktop/SabineRS/MSI/s2classifications/combined', file) for file in os.listdir('/home/wcc/Desktop/SabineRS/MSI/s2classifications/combined') if file.endswith('.vrt')]
combined_labels = sorted(combined_labels, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

In [None]:
# the Gamma Map filtered data is in EPSG:4326 and not EPSG:32615

label_epsg = get_EPSG(combined_labels[0])
orig_epsg = get_EPSG(orig_ims[0])

In [None]:
src = gdal.Open(combined_labels[0])
geo_transform = src.GetGeoTransform()
# s2_bounds= box(s2_bounds.left, s2_bounds.bottom, s2_bounds.right, s2_bounds.top) #(minx, miny, maxx, maxy)
coords = [geo_transform[0], 
           geo_transform[0] + (src.RasterXSize * geo_transform[1]), 
           geo_transform[3] + (src.RasterYSize * geo_transform[5]), 
           geo_transform[3]
            ]
bbox = [coords[0], coords[2], coords[1], coords[3]]
bbox

In [255]:
for i , im in enumerate(orig_ims):
    clip_raster_by_bbox(im, os.path.join(im[:30], f'1_clipped/{im[-29:]}'), bbox)

    clip_raster_by_bbox(orig_glcms[i], os.path.join(orig_glcms[i][:30], f'1_clipped/{orig_glcms[i][-22:]}'), bbox)

In [None]:
plot_vv_vh_with_bbox(orig_ims[0], bbox) # before clip
plot_vv_vh_with_bbox('/home/wcc/Desktop/SabineRS/GRD/1_clipped/backscatter/s1_2020-08-08.tif', bbox) # after clip

In [257]:
# get the reprojected and clipped data
clip_orig = get_grd('/home/wcc/Desktop/SabineRS/GRD/1_clipped/backscatter')
clip_orig_glcms = get_glcm('/home/wcc/Desktop/SabineRS/GRD/1_clipped/glcm')

clip_orig = sorted(clip_orig, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))
clip_orig_glcms = sorted(clip_orig_glcms, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

BREAKS HERE NOW FOR SOME REASON

In [273]:
# use gdal.BuildVRT() to create new stacked rasters
# will be one raster for each time stamp that S1 and S2 are close in time
# will include the VV and VH, GLCM bands, and the four label rasters for each date ()
# combine the water-land labels into a single .vrt raster

for i, im in enumerate(clip_orig):
    orig_vrt_options = gdal.BuildVRTOptions(separate=True)
    orig_vrt = gdal.BuildVRT(f'/home/wcc/Desktop/SabineRS/GRD/1_clipped/combined/{im[-26:-4]}.vrt', [im, clip_orig_glcms[i]], options=orig_vrt_options)
    # gdal.Translate(f'/home/wcc/Desktop/SabineRS/GRD/1_clipped/labels_{im[-14:]}', vrt)
    orig_vrt = None

combined_grd = [os.path.join('/home/wcc/Desktop/SabineRS/GRD/1_clipped/combined/', file) for file in os.listdir('/home/wcc/Desktop/SabineRS/GRD/1_clipped/backscatter/') if file.endswith('.vrt')]
# combined_grd = sorted(combined_grd, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

# 2. Register Sentinel-1 Imagery to Sentinel-2 labeled data?

In [None]:
# get PCA for SAR iamgery based on VV and VH bands only (no GLCM)

# PCA for each image
for im in combined_grd:
    perform_pca(im, os.path.join(im[:-26], f'/pca/{im[-14:]}'))


pcapath = os.path.join('/home/wcc/Desktop/SabineRS/GRD/1_clipped/combined/pca/')
pca_ims = [os.path.join(pcapath, file) for file in os.listdir(pcapath) if file.endswith('.vrt')]
pca_ims = sorted(pca_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

In [None]:
ref_ds = gdal.Open('/home/wcc/Desktop/SabineRS/MSI/RGB_NIR_SWIR1/rgb_average_pca.tif')
ref_im = ref_ds.GetRasterBand(1).ReadAsArray()

for i, im in enumerate(pca_ims):
    pca_ds = gdal.Open(im)
    pca_im = pca_ds.GetRasterBand(1).ReadAsArray() 

    warp_matrix = register_images(ref_im,pca_im)

    if warp_matrix is not None:
        orig_dataset = gdal.Open(combined_grd[i])
        orig_bands = [orig_dataset.GetRasterBand(j + 1).ReadAsArray() for j in range(orig_dataset.RasterCount)]

        transformed_orig = apply_transformation_to_all_bands(orig_bands, warp_matrix, ref_im.shape)
        save_multiband_image_as_tiff(os.path.join(combined_grd[i][:30], f'2_registered/{combined_grd[i][50:]}'), transformed_orig, orig_dataset)

# 3. Label the Sentinel-1 data as land or water

In [None]:
# need to match on the closest dates only
# will only end up with 46 pairs for labeling


In [None]:
for i, im in enumerate(combined_labels):

In [None]:
combined_labels
combined_orig
combined_filt

In [None]:
for i, im in enumerate(clip_orig):
    orig_vrt_options = gdal.BuildVRTOptions(separate=True)
    orig_vrt = gdal.BuildVRT(f'/home/wcc/Desktop/SabineRS/GRD/2_clipped/combined/{im[-26:-4]}.vrt', [im, clip_orig_glcms[i]], options=vrt_options)
    # gdal.Translate(f'/home/wcc/Desktop/SabineRS/GRD/2_clipped/labels_{im[-14:]}', vrt)
    orig_vrt = None

    filt_vrt_options = gdal.BuildVRTOptions(separate=True)
    filt_vrt = gdal.BuildVRT(f'/home/wcc/Desktop/SabineRS/GRD/2_clipped/combined/{clip_filt[i][-26:-4]}.vrt', [clip_filt[i], clip_filt_glcms[i]], options=vrt_options)
    filt_vrt = None

In [None]:
# Label the S1 pixels as 0 or 1 depending on overlap with labeled image from S2


# 4. Save labeled S1 imagery for training Neural Network

In [None]:
# save the S1 (original and filtered) with all bands in one image for training NN
# include VV, VH, GLCM, and the four classes for each pixel
