In [1]:
import numpy as np
import rasterio
import os
from osgeo import gdal, osr
from shapely.geometry import box
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from shapely.geometry import box
from datetime import datetime
import cv2

# Functions

In [2]:
def get_labels(labelpath):
    otsu_ims = [os.path.join(labelpath, f'otsu/{file}') for file in os.listdir(os.path.join(labelpath, f'otsu')) if file.endswith('.tif')]
    kmeans_ims = [os.path.join(labelpath, f'kmeans/{file}') for file in os.listdir(os.path.join(labelpath, f'kmeans')) if file.endswith('.tif')]
    gmm_ims = [os.path.join(labelpath, f'gmm/{file}') for file in os.listdir(os.path.join(labelpath, f'gmm')) if file.endswith('.tif')]
    majority_ims = [os.path.join(labelpath, f'majority/{file}') for file in os.listdir(os.path.join(labelpath, f'majority')) if file.endswith('.tif')]

    
    otsu_ims = sorted(otsu_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))
    kmeans_ims = sorted(kmeans_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))
    gmm_ims = sorted(gmm_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))
    majority_ims = sorted(majority_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

    return otsu_ims, kmeans_ims, gmm_ims, majority_ims

def get_grd(grdpath):
    orig_ims = [os.path.join(grdpath, file) for file in os.listdir(grdpath) if file.endswith('.tif')]
    orig_ims = sorted(orig_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

    return orig_ims

def get_glcm(glcmpath):
    orig_glcms = [os.path.join(glcmpath, file) for file in os.listdir(glcmpath) if file.endswith('.tif')]
    orig_glcms = sorted(orig_glcms, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

    return orig_glcms

# Clip each Sentinel-1 image
def clip_sentinel1_image(s1_path, output_path, s2_bounds):
    with rasterio.open(s1_path) as src:
        # Calculate the window corresponding to the bounding box (extent)
        window = rasterio.windows.from_bounds(*s2_bounds, transform=src.transform)
        
        # Read and clip the Sentinel-1 image
        clipped_image = src.read(window=window)
        
        # Create metadata for the clipped image
        out_meta = src.meta.copy()
        out_meta.update({
            'height': window.height,
            'width': window.width,
            'transform': src.window_transform(window)
        })
        
        # Write the clipped image to a new file
        with rasterio.open(output_path, 'w', **out_meta) as dst:
            dst.write(clipped_image)

def export_s1images(coll, s2_bounds, type):
    """
    type = str
        'original' or fitlertype used in sentinel_one_two.ipynb
    
    """

    for s1_path in enumerate(coll):
        output_path = os.path.join(s1_path[:16], f'Clipped/{type}/{s1_path[-17:]}')
        clip_sentinel1_image(s1_path, output_path, s2_bounds)

def plot_vv_vh_with_bbox(image_path, bbox):
    # Open the image using rasterio (assuming VV and VH are the first two bands)
    with rasterio.open(image_path) as src:
        # Read the VV and VH bands
        vv = src.read(1)  # VV is in the first band
        vh = src.read(2)  # VH is in the second band

        # Get the extent of the image (top-left and bottom-right coordinates)
        transform = src.transform
        height, width = vv.shape
        top_left = rasterio.transform.xy(transform, 0, 0, offset='center')
        bottom_right = rasterio.transform.xy(transform, height - 1, width - 1, offset='center')

    # Extract easting and northing from the corners
    min_easting, max_northing = top_left
    max_easting, min_northing = bottom_right

    # Prepare the bounding box as a shapely geometry
    bbox_geom = box(*bbox)  # Ensure bbox is an iterable [min_x, min_y, max_x, max_y]

    fig, ax = plt.subplots(1, 2, figsize=(14, 6))  # Two subplots for VV and VH bands

    # VV raster visualization with bounding box
    ax[0].imshow(vv, cmap='gray', extent=[min_easting, max_easting, min_northing, max_northing])
    x, y = bbox_geom.exterior.xy  # Extract coordinates for plotting the bounding box
    ax[0].plot(x, y, color='red', linewidth=2, label="Sentinel-2 Coverage")
    ax[0].set_title('VV Band with Bounding Box')
    ax[0].set_xlabel('Easting (meters)')
    ax[0].set_ylabel('Northing (meters)')
    ax[0].xaxis.set_major_locator(mticker.MaxNLocator(5))  # Reduce x-axis ticks
    ax[0].yaxis.set_major_locator(mticker.MaxNLocator(5))  
    ax[0].legend(loc='lower right')

    # VH raster visualization with bounding box
    ax[1].imshow(vh, cmap='gray', extent=[min_easting, max_easting, min_northing, max_northing])
    ax[1].plot(x, y, color='red', linewidth=2, label="Sentinel-2 Coverage")
    ax[1].set_title('VH Band with Bounding Box')
    ax[1].set_xlabel('Easting (meters)')
    ax[1].set_ylabel('Northing (meters)')
    ax[1].xaxis.set_major_locator(mticker.MaxNLocator(5))  # Reduce x-axis ticks
    ax[1].yaxis.set_major_locator(mticker.MaxNLocator(5))  
    ax[1].legend(loc='lower right')

    # Show the plot with layout adjustments
    plt.tight_layout()
    plt.show()

def get_EPSG(im):
    src = gdal.Open(im)
    wkt_projection = src.GetProjection()
    spatial_ref = osr.SpatialReference()
    spatial_ref.ImportFromWkt(wkt_projection)
    epsg_code = spatial_ref.GetAttrValue('AUTHORITY', 1)
    print(epsg_code)

    return epsg_code 

def reproject_raster(input_raster, output_raster, target_crs='EPSG:32615'):

    # Reproject using gdal.Warp
    warp_options = gdal.WarpOptions(dstSRS=target_crs)
    gdal.Warp(output_raster, input_raster, options=warp_options)

def clip_raster_by_bbox(input_raster, output_raster, bbox):
    # Define the output bounds (min_x, min_y, max_x, max_y)
    min_x, min_y, max_x, max_y = bbox

    # Use gdal.Translate to clip the raster by the bounding box
    options = gdal.TranslateOptions(projWin=[min_x, max_y, max_x, min_y])  # Note the order: projWin=[min_x, max_y, max_x, min_y]
    
    # Perform the clipping operation
    gdal.Translate(output_raster, input_raster, options=options)


def perform_pca(image_path, output_pca_path):
    dataset = gdal.Open(image_path)

    # Read all bands as separate arrays
    bands = [dataset.GetRasterBand(1).ReadAsArray(),  dataset.GetRasterBand(2).ReadAsArray()]

    # Convert the list of bands into a 3D NumPy array (bands, rows, cols)
    bands_array = np.stack(bands, axis=0)

    # Reshape the bands array into (pixels, bands) for PCA
    pixels, bands_count = bands_array.shape[1] * bands_array.shape[2], bands_array.shape[0]
    flattened_image = bands_array.reshape(bands_count, -1).T  # Shape: (pixels, bands)

    # Convert to float32 for OpenCV PCA
    flattened_image = flattened_image.astype(np.float32)

    # Perform PCA using OpenCV (reduce to 1 principal component)
    mean, eigenvectors = cv2.PCACompute(flattened_image, mean=None, maxComponents=1)
    pca_result = cv2.PCAProject(flattened_image, mean, eigenvectors)

    # Reshape the PCA result back to the original image dimensions
    pca_image = pca_result.reshape(bands_array.shape[1], bands_array.shape[2])
    # pca_image = np.nan_to_num(pca_image, nan=0.0, posinf=255.0, neginf=0.0)

    # Normalize the PCA image to 0-255 for OpenCV processing
    pca_image_normalized = cv2.normalize(pca_image, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)

    # Save the PCA-reduced image
    output = gdal.GetDriverByName('GTiff').Create(output_pca_path, dataset.RasterXSize, dataset.RasterYSize, 1, gdal.GDT_Float32)
    output.SetProjection(dataset.GetProjection())
    output.SetGeoTransform(dataset.GetGeoTransform())
    output.GetRasterBand(1).WriteArray(pca_image_normalized)
    output.FlushCache()  # Ensure data is written to disk
    output = None

def register_images(reference_image, target_image):
    # Define warp mode: use affine transformation (can also use cv2.MOTION_EUCLIDEAN)
    warp_mode = cv2.MOTION_TRANSLATION

    # Initialize the transformation matrix (2x3 affine transformation matrix)
    warp_matrix = np.eye(2, 3, dtype=np.float32)

    # Define criteria for the ECC algorithm
    criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 5000, 1e-6)

    # Perform the ECC algorithm to find the transformation matrix
    try:
        cc, warp_matrix = cv2.findTransformECC(reference_image, target_image, warp_matrix, warp_mode, criteria)
    except cv2.error as e:
        print(f"Error in ECC: {e}")
        return None

    return warp_matrix

def apply_transformation_to_all_bands(target_bands, warp_matrix, image_shape, output_dtype=np.float32):
    transformed_bands = []
    
    for band in target_bands:
        # Apply the transformation to the band
        transformed_band = cv2.warpAffine(band.astype(np.float32), warp_matrix, (image_shape[1], image_shape[0]), 
                                          flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP)
        
        # Handle NaN or infinite values by replacing them with valid values (e.g., 0)
        # transformed_band = np.nan_to_num(transformed_band, nan=0.0, posinf=0.0, neginf=0.0)
        
        # Convert to the desired output data type
        transformed_band = transformed_band.astype(output_dtype)
        
        transformed_bands.append(transformed_band)
    
    return transformed_bands

def save_multiband_image_as_tiff(output_path, transformed_bands, reference_dataset, gdal_dtype=gdal.GDT_Float32):
    # Create an output GeoTIFF file with the same dimensions and the same number of bands
    driver = gdal.GetDriverByName('GTiff')
    out_dataset = driver.Create(output_path, reference_dataset.RasterXSize, reference_dataset.RasterYSize, len(transformed_bands), gdal_dtype)

    # Set the projection and geotransform from the reference dataset
    out_dataset.SetProjection(reference_dataset.GetProjection())
    out_dataset.SetGeoTransform(reference_dataset.GetGeoTransform())

    # Write each transformed band to the output file
    for i, transformed_band in enumerate(transformed_bands):
        out_dataset.GetRasterBand(i + 1).WriteArray(transformed_band)

    # Flush data to disk
    out_dataset.FlushCache()
    out_dataset = None

def sar_ratio(impathlist, outpath):
    for im in impathlist:
        testds = gdal.Open(im)

        vv_dB = testds.GetRasterBand(1).ReadAsArray()  #.astype(float)
        vh_dB = testds.GetRasterBand(2).ReadAsArray()    #.astype(float)

        # Convert from dB to linear scale
        vv_linear = 10 ** (vv_dB / 10)
        vh_linear = 10 ** (vh_dB / 10)

        with np.errstate(divide='ignore', invalid='ignore'):
            ratio1 = (4 * vh_linear)/(vh_linear + vv_linear) # sentinel-1 radar veg index
            ratio1[ratio1 == np.inf] = np.nan
            ratio2 = np.log(10 * vv_linear * vh_linear) # sentinel-1 dual-polarization water index
            ratio2[ratio2 == np.inf] = np.nan


        save_multiband_image_as_tiff(os.path.join(outpath, im[-14:]), [vv_dB, vh_dB, ratio1, ratio2], testds)


# 1. Clip the Sentinel-1 bands to the same extent as the Sentinel-2 imagery 
- need to clip using the bbox from first image
- need to stack all GRD and GLCM bands into a single .tif or .vrt for respective dates
- stack/include the four labels raster bands as well

In [None]:
############### WSL #########################
# work_dir = '/mnt/d/SabineRS'

############### linux #########################
# work_dir = '/home/wcc/Desktop/SabineRS/'

In [4]:
# set the directory for where your images are located

############### WSL #########################
# man_ims, otsu_ims, kmeans_ims, gmm_ims = get_labels('/mnt/d/SabineRS/s2classifications')
# orig_ims = get_grd('/mnt/d/SabineRS/GRD/0_initial/backscatter')
# orig_glcms = get_glcm('/mnt/d/SabineRS/GRD/0_initial/glcm')

############### Linux #########################
otsu_ims, kmeans_ims, gmm_ims, majority_ims = get_labels('/home/wcc/Desktop/SabineRS/MSI/s2classifications')
orig_ims = get_grd('/home/wcc/Desktop/SabineRS/GRD/0_initial/backscatter')
orig_glcms = get_glcm('/home/wcc/Desktop/SabineRS/GRD/0_initial/glcm')

In [5]:
regrgbpath = '/home/wcc/Desktop/SabineRS/MSI/RGB_NIR_SWIR1/registered'
reg_rgb_ims = [os.path.join(regrgbpath, file) for file in os.listdir(regrgbpath) if file.endswith('.tif')]
reg_rgb_ims = sorted(reg_rgb_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

In [None]:
# the Gamma Map filtered data is in EPSG:4326 and not EPSG:32615

label_epsg = get_EPSG(reg_rgb_ims[0])
orig_epsg = get_EPSG(orig_ims[0])

In [None]:
src = gdal.Open(reg_rgb_ims[0])
geo_transform = src.GetGeoTransform()
# s2_bounds= box(s2_bounds.left, s2_bounds.bottom, s2_bounds.right, s2_bounds.top) #(minx, miny, maxx, maxy)
coords = [geo_transform[0], 
           geo_transform[0] + (src.RasterXSize * geo_transform[1]), 
           geo_transform[3] + (src.RasterYSize * geo_transform[5]), 
           geo_transform[3]
            ]
bbox = [coords[0], coords[2], coords[1], coords[3]]
bbox

In [8]:
for i , im in enumerate(orig_ims):
    clip_raster_by_bbox(im, os.path.join(work_dir, f'GRD/1_clipped/backscatter/{im[-17:]}'), bbox)

    clip_raster_by_bbox(orig_glcms[i], os.path.join(work_dir, f'GRD/1_clipped/{orig_glcms[i][-22:]}'), bbox)

In [None]:
# i = 0

# plot_vv_vh_with_bbox(orig_ims[i], bbox) # before clip
# plot_vv_vh_with_bbox(f'/mnt/d/SabineRS/GRD/1_clipped/backscatter/{orig_ims[5][-17:]}', bbox) # after clip
plot_vv_vh_with_bbox(f'/home/wcc/Desktop/SabineRS/GRD/1_clipped/backscatter/{orig_ims[120][-17:]}', bbox) # after clip

# Register Imagery

In [10]:
# get the reprojected and clipped data
clip_orig = get_grd(os.path.join(work_dir, f'GRD/1_clipped/backscatter'))
clip_orig_glcms = get_glcm(os.path.join(work_dir, f'GRD/1_clipped/glcm'))

clip_orig = sorted(clip_orig, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))
clip_orig_glcms = sorted(clip_orig_glcms, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

having problems with pca registration. I think it is just because my GRD and GLCM images have smaller bbox than my S2 classifications. Will be getting larger bbox images now to retry

In [11]:
# get PCA for SAR iamgery based on VV and VH bands only (no GLCM)

# PCA for each image
for im in clip_orig:
    perform_pca(im, os.path.join(work_dir, f'GRD/1_clipped/pca/{im[-14:]}'))


pcapath = os.path.join(work_dir,'GRD/1_clipped/pca')
pca_ims = [os.path.join(pcapath, file) for file in os.listdir(pcapath) if file.endswith('.tif')]
pca_ims = sorted(pca_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

In [12]:
ref_ds = gdal.Open(os.path.join(work_dir, f'MSI/RGB_NIR_SWIR1/rgb_average_pca.tif'))
ref_im = ref_ds.GetRasterBand(1).ReadAsArray()

for i, im in enumerate(pca_ims):
    pca_ds = gdal.Open(im)
    pca_im = pca_ds.GetRasterBand(1).ReadAsArray() 

    warp_matrix = register_images(ref_im,pca_im)

    if warp_matrix is not None:
        orig_dataset = gdal.Open(clip_orig[i])
        orig_bands = [orig_dataset.GetRasterBand(j + 1).ReadAsArray() for j in range(orig_dataset.RasterCount)]

        glcm_ds = gdal.Open(clip_orig_glcms[i])
        glcm_bands = [glcm_ds.GetRasterBand(j + 1).ReadAsArray() for j in range(glcm_ds.RasterCount)]

        transformed_orig = apply_transformation_to_all_bands(orig_bands, warp_matrix, ref_im.shape)
        transformed_glcm = apply_transformation_to_all_bands(glcm_bands, warp_matrix, ref_im.shape)

        save_multiband_image_as_tiff(os.path.join(work_dir, f'GRD/2_registered/backscatter/{clip_orig[i][-17:]}'), transformed_orig, orig_dataset)
        save_multiband_image_as_tiff(os.path.join(work_dir, f'GRD/2_registered/glcm/{clip_orig_glcms[i][-17:]}'), transformed_glcm, glcm_ds)

# Generate VV/VH ratio bands

In [22]:
regpath = os.path.join(work_dir,'GRD/2_registered')

reg_grd_ims = [os.path.join(regpath, f'backscatter/{file}') for file in os.listdir(os.path.join(regpath, 'backscatter')) if file.endswith('.tif')]
reg_grd_ims = sorted(reg_grd_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

reg_glcm_ims = [os.path.join(regpath, f'glcm/{file}') for file in os.listdir(os.path.join(regpath, 'glcm')) if file.endswith('.tif')]
reg_glcm_ims = sorted(reg_glcm_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

In [28]:
# vv, vh, sentinel-1 radar veg index, and sentinel-1 dual-pol water index

# sar_ratio(reg_grd_ims, f'/mnt/d/SabineRS/GRD/3_ratio')
sar_ratio(reg_grd_ims, f'/home/wcc/Desktop/SabineRS/GRD/3_ratio')
ratio_ims = [os.path.join('/home/wcc/Desktop/SabineRS/GRD/3_ratio', file) for file in os.listdir('/home/wcc/Desktop/SabineRS/GRD/3_ratio') if file.endswith('.tif')]
ratio_ims = sorted(ratio_ims, key=lambda x: datetime.strptime(x[-14:-4], '%Y-%m-%d'))

# Fun plots

In [None]:
# Assuming 'ratio_ims' is your list of SAR image paths
sarbands = []
sardates = []

# Load SAR images as before
for im in [ratio_ims[0], ratio_ims[5], ratio_ims[-1]]:
    with rasterio.open(im) as src:
        # Read the VV band
        sarband = src.read(4)  # 1 = VV, 2 = VH, 3 = RVI, 4 = DPWI
        sarbands.append(sarband)
        sardates.append(im[-14:-4])
        # Get the extent of the image (top-left and bottom-right coordinates)
        transform = src.transform
        height, width = sarband.shape
        top_left = rasterio.transform.xy(transform, 0, 0, offset='center')
        bottom_right = rasterio.transform.xy(transform, height - 1, width - 1, offset='center')

# Load class images as before
classes= []
for im in [majority_ims[0], majority_ims[1], majority_ims[-1]]:
    with rasterio.open(im) as src:
        # Read the VV band
        label = src.read(1)  # VV is in the first band
        classes.append(label)
        # Get the extent of the image (top-left and bottom-right coordinates)
        transform = src.transform
        height, width = label.shape
        top_left = rasterio.transform.xy(transform, 0, 0, offset='center')
        bottom_right = rasterio.transform.xy(transform, height - 1, width - 1, offset='center')


# Extract easting and northing from the corners
min_easting, max_northing = top_left
max_easting, min_northing = bottom_right

# Load the classification image and select the second band

# Prepare the bounding box as a shapely geometry (assuming bbox is defined elsewhere)
bbox_geom = box(min_easting, min_northing, max_easting, max_northing)

# Create the figure and subplots
fig, ax = plt.subplots(1, 3, figsize=(15, 6), dpi=1000)

# Iterate through each SAR band and add the overlay
for i in range(3):
    ax[i].imshow(sarbands[i], cmap='gray', extent=[min_easting, max_easting, min_northing, max_northing])
    # Overlay the second band of the classification image with transparency
    ax[i].imshow(classes[i], cmap='viridis', alpha=0.15, extent=[min_easting, max_easting, min_northing, max_northing])
    
    # Plot the bounding box (if necessary)
    x, y = bbox_geom.exterior.xy
    # ax[i].plot(x, y, color='red', linewidth=2, label="Sentinel-2 Coverage")  # Uncomment if you want the bounding box

    # Set titles and labels
    ax[i].set_title(sardates[i])
    ax[i].set_xlabel('Easting (meters)')
    ax[i].set_ylabel('Northing (meters)')
    ax[i].xaxis.set_major_locator(mticker.MaxNLocator(5))
    ax[i].yaxis.set_major_locator(mticker.MaxNLocator(5))

# Adjust layout and display
plt.tight_layout()
plt.show()

# All required images for training the classification algorithm should now be saved separately. All images have been registered to the PCA of the Average Sentinel-2 Surface reflectance image fro the entire time-series.

- Land-Water Classifications from Sentinel-2 data saved as a 4-band raster (NDVI >0.0 and NDWI <0.0 mask, Otsu threshold based mask on NDVI and NDWI, KMeans on the RGB-NIR-SWIR1 bands, and GMM on RGB-NIR-SWIR1 bands), saved as .vrt files
- VV and VH from Sentinel-1 data saved as a 2-band raster,saved as .tif files
- GLCM bands (ontrast, asm, diss, idm, corr, var, ent for both VV and VH bands) 14-band raster saved as .tif files