Main goal of this notebook is to use the google earth engine to identify and create features that represent the shoreline/waterline of the wetland projects based on different parameters like NDWI, VV and VH backscatter, and individual mutispectral bands

In [None]:
## before anything you need to visit the site below and make sure you have a google earth engine account
## this is so you can access Sentinel-1 GRD and Sentinel-2 TOA and SR products, as well as other sensor packages and data types

## visit the below website below to setup an earth engine account, enable a cloud project, and enable the ee API 
## https://developers.google.com/earth-engine/cloud/earthengine_cloud_project_setup#get-access-to-earth-engine

In [None]:
# install dependicies, can skip if you have a python environment with these already

!pip install earthengine-api
!pip install geemap
!pip install matplotlib
!pip install numpy
!pip install pandas

In [None]:
import ee
import geemap
import geemap.colormaps as cm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd
import math
import json

In [None]:
## only need to run this once
## after authenticating with google earth engine you will only need to initialize each session

## https://developers.google.com/earth-engine/guides/auth
ee.Authenticate()

In [None]:
## init ee cloud project you made during initial setup
ee.Initialize(project = 'ee-claycaldgsl') ##enter your project name here as a string to initialize exchanges with ee api
# I think you may be able to leave it empty and it still would work

# Some functions for a bit easier mapping
super simple for now, might make them better later

In [None]:
## Function to add RGB images to the map.
def add_rgb_to_map(image, map_object):
    """
    Function to add True color Sentinel-2 imagery to geemap Map

    Args:
        image: ee.Image
        map_object: geemap.Map
    
    Returns:
        map_object layer on geemap.Map
    """
    date = ee.Date(image.get('date')).format('YYYY-MM-dd').getInfo()
    map_object.addLayer(image, {'min': 0, 'max': 2000, 'bands': ['B4', 'B3', 'B2']}, f'{date}_rgb')

## Function to add spectral indices images to the map.
def add_ind_to_map(image, map_object, band):
    """
    Function to add Spectral Indices from Sentinel-2 imagery to geemap Map

    Args:
        image: ee.Image
        map_object: geemap.Map
        band: string. 'NDVI', 'NDWI', 'MSAVI2', 'BSI' are appropriate

    Returns:
        map_object on geemap.Map
    """

    date = ee.Date(image.get('date')).format('YYYY-MM-dd').getInfo()
    if band =='NDWI':
        map_object.addLayer(image, {'min': -1, 'max': 1, 'bands': band, 'palette': cm.palettes.ndwi}, f'{date}_{band}')
    elif band =='NDVI': 
        map_object.addLayer(image, {'min': -1, 'max': 1, 'bands': band, 'palette': cm.palettes.ndvi}, f'{date}_{band}')
    elif band == 'MSAVI2':
        map_object.addLayer(image, {'min': -1, 'max': 1, 'bands': band, 'palette': cm.palettes.RdYlGn}, f'{date}_{band}')
    elif band == 'BSI':
        map_object.addLayer(image, {'min': -1, 'max': 1, 'bands': band, 'palette': cm.palettes.Greens}, f'{date}_{band}')

## ndvi = Normalized Difference Vegetation Index, good for vegetation health and cover
## ndwi = Normalized Difference Water Index, good for identifying water bodies and mositure in surface
def s2_10m_target_indices(image):
    """
    Function to calculate Spectral Indices from Sentinel-2 imagery

    Args:
        image: ee.Image

    Returns:
        ee.ImageCollection with the new calculated spectral indices as the bands for the images
    """

    # Calculate NDVI
    ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI').toFloat()
    # Calculate NDWI
    ndwi = image.normalizedDifference(['B3', 'B8']).rename('NDWI').toFloat()
    # Calculate MSAVI2
    msavi2 = image.expression(
        '((2 * NIR + 1) - ((2 * NIR + 1) ** 2 - 8 * (NIR - RED)) ** 0.5) / 2',
        {
            'NIR': image.select('B8'),
            'RED': image.select('B4')
        }
    ).rename('MSAVI2').toFloat()

    # Calculate the Bare-Soil Index (BSI)
    bsi = image.expression(
        '((SWIR1 + RED) - (NIR + BLUE)) / ((SWIR1 + RED) + (NIR + BLUE))',
        {
            'RED': image.select('B4'),
            'NIR': image.select('B8'),
            'BLUE': image.select('B2'),
            'SWIR1': image.select('B11').resample(mode = 'bicubic') ### scale of 10m specified when exporting images
        }
    ).rename('BSI').toFloat()
    
    # Add all indices as new bands to the image
    return image.addBands([ndvi, ndwi, msavi2, bsi])

## collects Sentinel-2 SR (multispectral, adjusted for top of atmosphere reflectance)
def get_sentinel_imagery(aoi, start_date, end_date, s2_cloud_cov, orbit):
    """
    Function to retrieve Sentinel-2 imagery from Google Earth Enging

    Args:
        aoi: ee.Feature, drawn on the first interactive map
        start_date: string. Format 'YYYY-MM-DD'
        end_date: string. Format 'YYYY-MM-DD'
        s2_cloud_cov: int. Integer to limit the percentage of cloud coverage in each of the images covering aoi
        orbit: string. 'ASCENDING' or 'DESCENDING'. Lets you filter the orbit direction of the imagery. Mostly 'ASCENDING'

    Returns:
        ee.ImageCollection containing the Sentinel-2 imagery
    """

    ## Sentinel-2 Surface Reflectance Harmonized ImageCollection
    s2_10m = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
               .filterBounds(aoi)
               .filterDate(ee.Date(start_date), ee.Date(end_date))
               .map(lambda img: img.set('date', ee.Date(img.date()).format('YYYYMMdd')))
               .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', s2_cloud_cov))
               .sort('date')
            #    .select(['B2', 'B3', 'B4', 'B8', 'B11'])
    )
    ## Clip all images in the collection to the AOI
    s2_10m = s2_10m.map(lambda img: img.clip(aoi))

    # return sentinel-2 imagery
    return s2_10m

def get_spectral_indices(imagecollection):
    """
    Function to execute the s2_10m_target_indices function to calculate the spectral indices

    Args:
        imagecolection: ee.ImageColleciton
    
    Returns:
        ee.ImageCollection containing the spectral indices
    """

    return imagecollection.map(s2_10m_target_indices).select(['NDVI', 'NDWI', 'MSAVI2', 'BSI'])

def mask_s2_collections(s2_collections, qa_band='cs_cdf', clear_threshold=0.60):
    """
    Mask clouds in multiple Sentinel-2 collections using Cloud Score+
    
    Args:
        s2_collections: List of ee.ImageCollection objects (Sentinel-2 collections)
        qa_band: String, either 'cs' or 'cs_cdf'
        clear_threshold: Float between 0-1, threshold for cloud masking
    
    Returns:
        List of masked ee.ImageCollection objects
    """
    # Initialize Cloud Score+ collection
    csPlus = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED')
    
    def mask_clouds(img):
        """Apply cloud mask to a single image"""
        return img.updateMask(img.select(qa_band).gte(clear_threshold))
    
    masked_collections = []
    
    for collection in s2_collections:
        # Link with Cloud Score+ and apply masking
        masked_collection = (collection
            .linkCollection(csPlus, [qa_band])
            .map(mask_clouds))
        
        masked_collections.append(masked_collection.select(['B2', 'B3', 'B4', 'B8', 'B11']))
    
    return masked_collections

## fucntion to get the date of each image in the image collection
def get_date(image):
    """
    Function to retrieve date informaiton from the ee.ImageCollections
    
    Args:
        image: ee.Image

    Returns:
        ee.Feature containing only dates, can be made into a list for easy access 
    """

    return ee.Feature(None, {'date': image.date().format('YYYY-MM-dd')})

def get_histogram(image, scale, bucket_num, band_name):
    """
    Used to create histograms for the image passed

    Args:
        image: ee.Image, NDWI image to determine the shoreline from
        scale: int, scale to estimate the histogram from, typically 10 to match the resolution of the RGB imagery
        bucket_num: int, number of buckets to put the data into for histogram
        band_name: str, the name of your target band in the image
    """


    # Reduce the image to get a histogram over the region of interest (ROI)
    hist = image.reduceRegion(
        reducer=ee.Reducer.histogram(maxBuckets=bucket_num),  # Adjust the number of buckets as needed
        geometry=aoi,
        scale=scale,  # Adjust based on image resolution
        maxPixels=1e8
    )
    
    # Get the histogram data for NDWI
    histogram = ee.Dictionary(hist.get(band_name)).getInfo() 
    
    return histogram

def filter_masked_collections(masked_collections, clear_threshold=0.60, scale=10):
    """
    Function to mask out the cloudy pixels using Cloud Pixel Score + dataset 

    Args:
        masked_collection: ee.ImageCollection
        clear_threshold: float, likelihood of pixel to be classified as cloudy
        scale: scale of global operator in m, 10 for sentinel-2
    """
    def filter_images_by_valid_pixels(image_collection, clear_threshold, scale):
        """
        Filters images in a single image collection based on the percentage of valid pixels.
        """
        def mask_valid_pixels(image):
            # Calculate the proportion of valid pixels in the image
            valid_pixels = image.mask().reduceRegion(
                reducer=ee.Reducer.sum(),
                geometry=image.geometry(),
                scale=scale,
                maxPixels=1e13
            ).values().get(0)
            
            total_pixels = image.geometry().area().divide(scale * scale)  # Estimate total pixels
            valid_fraction = ee.Number(valid_pixels).divide(total_pixels)

            # Add the valid fraction as a property to the image
            return image.set('valid_fraction', valid_fraction)

        # Map the valid pixel check over the image collection
        with_valid_fraction = image_collection.map(mask_valid_pixels)

        # Filter images based on the valid fraction threshold
        filtered_collection = with_valid_fraction.filter(ee.Filter.gte('valid_fraction', clear_threshold))

        return filtered_collection

    # Apply the filtering function to each image collection in the list
    filtered_collections = [
        filter_images_by_valid_pixels(collection, clear_threshold, scale) 
        for collection in masked_collections
    ]

    return filtered_collections

def plot_subaerial_change(masked_pairs, site_name, interpolate=True):
    """
    Function to estimate and plot the subaerial area changes of each creation site over time

    Args:
        masked_pairs: the masked_pairs dictionary created later in the notebook
        site_name: site name of the creationg site, a list is made later in the botebook that will let you choose from it
        interpolate: 
    """

    land_area_lists = []
    dates_lists = []

    # Extract land images and calculate land area
    land_images = extract_land(
        masked_pairs[site_name][0], 
        masked_pairs[site_name][1].select(['NDWI']),
        masked_pairs[site_name][1].select(['NDVI'])
    ) 

    land_area_images = land_images.map(calculate_land_area)
    land_area_list = land_area_images.aggregate_array('total_land_area').getInfo()
    dates_list = land_area_images.aggregate_array('system:time_start').getInfo()
    land_area_lists.append(land_area_list)
    dates_lists.append(dates_list)

    # Consolidate all unique dates across lists
    all_dates = set(dates_list)  # No need for nested lists
    all_dates = sorted(list(all_dates))

    # Convert to human-readable dates
    all_dates_readable = [datetime.utcfromtimestamp(date / 1000).strftime('%Y-%m-%d') for date in all_dates]

    # Initialize a dictionary for the DataFrame
    df_data = {'Date': pd.to_datetime(all_dates_readable)}  # Convert to datetime format

    # Create a Series with NaN for missing dates and populate with data
    site_land_area = pd.Series([np.nan] * len(all_dates), index=all_dates)
    for date, area in zip(dates_list, land_area_list):
        site_land_area[date] = area

    if interpolate == True:
        # Interpolate to fill NaN values
        site_land_area = site_land_area.interpolate(method='linear')

    # Add the Series to the DataFrame
    df_data[f'Subaerial Land Area (sq km), {site_name}'] = site_land_area.values

    # Create the DataFrame from the dictionary
    df = pd.DataFrame(df_data)

    # Define storm dates and colors
    storm_dates = {
        'Hurricane Laura': datetime.strptime('2020-08-27', '%Y-%m-%d'),
        'Hurricane Delta': datetime.strptime('2020-10-09', '%Y-%m-%d'),
        'Hurricane Zeta': datetime.strptime('2020-10-28', '%Y-%m-%d'),
        'Hurricane Ida': datetime.strptime('2021-08-29', '%Y-%m-%d'),
        'Hurricane Beryl': datetime.strptime('2024-07-24', '%Y-%m-%d'),
        'Hurricane Francine': datetime.strptime('2024-09-14', '%Y-%m-%d')
    }

    storm_colors = {
        'Hurricane Laura': '#1f77b4',  # Dark Blue
        'Hurricane Delta': '#2ca02c',  # Dark Green
        'Hurricane Zeta': '#d62728',   # Dark Red
        'Hurricane Ida': '#ff7f0e',    # Dark Orange
        'Hurricane Beryl': '#9467bd',  # Purple
        'Hurricane Francine': '#000000'  # Black
    }

    # Plot the land area over time for the site
    plt.figure(figsize=(30, 6), dpi=300)
    plt.plot(df['Date'], df[f'Subaerial Land Area (sq km), {site_name}'], marker='o')

    # Adding hurricane event lines with labels for the legend
    for storm, date in storm_dates.items():
        plt.axvline(x=date, color=storm_colors[storm], linestyle='--', linewidth=1.5, label=storm)

    # Labels and formatting
    plt.xlabel('Date', fontweight='bold', fontsize=14)
    plt.ylabel('Area (sq km)', fontweight='bold', fontsize=14)
    plt.title(f'Subaerial Land Area Changes Over Time, {site_name}', fontweight='bold', fontsize=16)
    plt.xticks(rotation=45, fontweight='bold')
    plt.yticks(fontweight='bold')
    plt.grid(True)

    # Set legend with storm names
    plt.legend(loc='upper left', fontsize='large')

    # Show plot
    plt.tight_layout()
    plt.show()

def extract_land(rgb_coll, ndwi_coll, ndvi_coll):
    """
    Function to extract the land pixels from each image using the NDWI and NDVI

    Args:
        rgb_coll: ee.ImageCollection, the imagecollection containing the RGB bands for true color composite
        ndwi_coll: ee.ImageCollection, the imagecollection containing the NDWI band for each image
        ndvi_coll: ee.ImageCollection, the imagecolelction containing the NDVI band for each image

    Returns:
        ee.ImageCollection containing only the land classified pixels in each image/creation site
    """
    land_area = []

    rgb_ims = rgb_coll.toList(rgb_coll.size())
    ndwi_ims = ndwi_coll.toList(ndwi_coll.size())
    ndvi_ims = ndvi_coll.toList(ndvi_coll.size())

    # Iterate over each image in the collection
    for i in range(ndwi_coll.size().getInfo()):
        # Get NDWI and NDVI masks
        ndwi_image = ee.Image(ndwi_ims.get(i))
        ndvi_image = ee.Image(ndvi_ims.get(i))

        # Create water mask from NDWI and land mask from NDVI
        watermask = ndwi_image.select('NDWI').lt(0.0)  # Water is NDWI < 0
        landmask = ndvi_image.select('NDVI').gt(0.0)  # Land is NDVI > 0
        
        # Apply the combined land mask to the RGB image (where watermask AND landmask are valid)
        combined_mask = landmask.And(watermask)
        masked_rgb = ee.Image(rgb_ims.get(i)).updateMask(combined_mask)
        
        land_area.append(masked_rgb)

    # Return land area as an ImageCollection
    land_area_coll = ee.ImageCollection(land_area)
    return land_area_coll

def calculate_land_area(image):
    """
    Function to calculate the area of the reamingin land classified pixels in each image

    Args:
        image: ee.Image, image with NIR band, should be 10m

    Returns:
        ee.ImageCollection with attirbute 'total_land_area' denoting the area of the land pixels in km2
    """

    pixel_area = ee.Image.pixelArea()  # Pixel area in square meters
    
    # Get the land area in square meters
    land_area = image.select('B8').multiply(pixel_area)  # Use the NIR band for land area
    
    # Sum up the total land area for the AOI
    total_land_area_m2 = land_area.reduceRegion(
        reducer=ee.Reducer.sum(),
        # geometry=aois,
        scale=10,  # Sentinel-2 resolution is 10 meters
        maxPixels=1e9
    ).get('B8')  # Sum of the NIR band for total land area

    # Convert from square meters to square kilometers
    total_land_area_km2 = ee.Number(total_land_area_m2).divide(1e6)  # 1 km² = 1,000,000 m²
    
    # Ensure the total area is positive (in case of any negative values)
    total_land_area_km2 = total_land_area_km2.abs()

    # Set the total land area as a property on the image
    return image.set('total_land_area', total_land_area_km2)

# Sabine area of interest

In [None]:
geojsonfile = '/Users/clayc/Documents/Dissertation/ActivePlacementSites.geojson'
boundaries = gpd.read_file(geojsonfile)
boundaries.PLACEMENTTYPE.unique().tolist()

In [None]:
filtered_gdf = boundaries[boundaries['PLACEMENTTYPE'] == 'uplandEcoHab']
filtered_gdf

In [None]:
## interactive map for you to draw a polygon to signify your aoi

## Create a map centered at a specific location
m = geemap.Map(center=[20, 0], zoom=2, basemap='HYBRID')

m.add_gdf(filtered_gdf, layer_name="Active Placement Sites", style={'color': 'orange', 'fillColor': 'orange', 'fillOpacity': 1, 'weight': 2})

## Add drawing tools
m.add_draw_control()
## Display the map
display(m)

In [None]:
## Get the drawn features
draw_features = m.draw_features[0]
## Establish ee.Polygon from drawn area of interest to collect imagery
aoi = ee.Geometry.Polygon(draw_features.getInfo()['geometry']['coordinates'][0])

# Get Imagery

In [None]:
start_date = '2017-03-28' ## start date of search window
end_date = '2024-12-31' ## end date of search window
s2_cloud_cov = 100 ## percentage of clouds in sentinel-2 multispectral imagery, smaller number means you see more of Earth's surface
orbit = 'ASCENDING' ## orbit for imagery

total_colls = {}
total_colls['s2_10m'] = get_sentinel_imagery(aoi, start_date, end_date, s2_cloud_cov, orbit)

# Cloud masking for the individual marsh creation sites
- First a global operator to remove all images from the time series with cloud coverage percentage greater than 20 percent (can increase or decrease)
- Use s2cloudless 10m product from ESA to mask out remaining cloud pixels for area change time series (https://developers.google.com/earth-engine/tutorials/community/sentinel-2-s2cloudless)
0 cloud score+? (https://medium.com/google-earth/all-clear-with-cloud-score-bd6ee2e2235e)

In [None]:
# Convert the first geometry in filtered_gdf to a GeoJSON dictionary
geojson_dict = json.loads(filtered_gdf.geometry.to_json())

# Create an ee.Geometry from the first feature's geometry
poly = ee.Geometry(geojson_dict['features'][0]['geometry'])

In [None]:
# cloud cover in each individual marsh creation site
# will mask out remaining cloud pixels and only use remaining pixels for time-series
cloud_refine = 10 

rgb = total_colls['s2_10m'].map(lambda img: img.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', cloud_refine)))

clipped_rgb = total_colls['s2_10m'].map(lambda img: img.clip(poly)).filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', cloud_refine))

site_colls = [clipped_rgb, get_spectral_indices(clipped_rgb)]

In [None]:
# removes pixels with cloud score + of .60 of greater, meaning they are 60% or greater likelihood it is a cloudy pixel
masked_colls = mask_s2_collections(site_colls, qa_band='cs_cdf', clear_threshold=0.60)

In [None]:
# filters the masked_collections to only contain images where 60% of pixels or more are not masked
# this drops all the images that had greater than 60% of the pixels in the image removed when masking
filtered_collections = filter_masked_collections(masked_colls, clear_threshold=0.60, scale=10)

# to show the lengths of the masked and the filtered colls, helps see number of images dropped after masking
for i, col in enumerate(masked_colls):
    display(col)
    display(filtered_collections[i])

# masked_pairs = {}

# for i, site in enumerate(sabine_site_colls.keys()):
#     masked_pairs[site] = [filtered_collections[i], get_spectral_indices(filtered_collections[i])]

In [None]:
# dictionary keys acting as names of each site
# use these sites to examine the time-series of each site

for site in masked_pairs.keys():
    print(site)

In [None]:
Map = geemap.Map()
Map.centerObject(aoi, 12)

# Visualize each image in the ImageCollection.
s2_images = filtered_collections[0].toList(filtered_collections[0].size())
for i in range(filtered_collections[0].size().getInfo()//20):
    image = ee.Image(s2_images.get(-i))
    add_rgb_to_map(image, Map)

# Display the map.
Map.addLayerControl(position = 'topright')
Map

# Extract time series of Land Area Change (m2) for the marsh creation sites defined above

In [None]:
# plots time series of Land Classified Area (km2) over the time period specified
# can, and will use this time series to examine seasonal trends
# will try to include some storm data from Station CAPL1 - 8768094 - Calcasieu Pass, LA NOAA Buoy
# will produce Maps of NDVI, MSAVI2, and BSI to show the evolution of vegetation cover over time for creation sites

for name in sabinesitenames:
    plot_subaerial_change(masked_pairs, name, interpolate=False)