# Try cloud masking approach from Moussavi et al (2020) for querying imagery

In [1]:
import ee
import geopandas as gpd
import os
import numpy as np
import wxee
import geedim
from tqdm.auto import tqdm
import rioxarray as rxr
import xarray as xr
import math
import geedim as gd

In [2]:
ee.Initialize()

In [3]:
# Test for one site
site_name = 'RGI60-01.00032'
# path to glacier-snow-cover-mapping/ 
base_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping/'
# path to folder containing AOI file
aoi_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/study-sites_NEW-CLOUD-MASK/' + site_name + '/AOIs/'
# AOI file name
aoi_fn = site_name + '_outline.shp' 
# path to folder containing dem raster file
dem_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/study-sites_NEW-CLOUD-MASK/' + site_name + '/DEMs/'
# DEM file name
# Note: set dem_fn=None if you want to use the ArcticDEM or NASADEM via Google Earth Engine
dem_fn = None
# path for output images
out_path = aoi_path + '../imagery/'
# path to PlanetScope images
# Note: set ps_im_path=None if not using PlanetScope
ps_im_path = out_path + 'PlanetScope/raw_images/'
# path for output figures
figures_out_path = aoi_path + '../figures/'

# -----Determine whether to mask clouds using the respective cloud masking data products
# NOTE: Cloud mask products anecdotally are less accurate over glacierized/snow-covered surfaces. 
# If the cloud masks are consistently masking large regions or your study site, I suggest setting mask_clouds = False
mask_clouds = True
cloud_cover_max = 70 # maximum cloud cover percentage according to the image metadata
aoi_coverage = 70 # total portion of AOI covered after any cloud filtering

# -----Define image search filters
date_start = '2013-05-01'
date_end = '2023-11-01'
month_start = 5
month_end = 11

# -----Determine image download, clipping & plotting settings
# Note: if im_download = False, but images over the aoi exceed GEE limit, images must be downloaded regardless.
im_download = False  # = True to download all satellite images by default
plot_results = True # = True to plot figures of results for each image where applicable
save_outputs = True # = True to save SCAs and snowlines to file
verbose = True # = True to print details for each image during each step
skip_clipped = False # = True to skip PlanetScope images where bands appear "clipped", i.e. max(blue) < 0.8

# -----Set paths for output files
s2_toa_im_path = os.path.join(out_path, 'Sentinel-2_TOA')
s2_sr_im_path = os.path.join(out_path, 'Sentinel-2_SR')
l_im_path = os.path.join(out_path, 'Landsat')
im_classified_path = os.path.join(out_path, 'classified')
snowlines_path = os.path.join(out_path, 'snowlines')

In [4]:
def convert_wgs_to_utm(lon: float, lat: float):
    """
    Return best UTM epsg-code based on WGS84 lat and lon coordinate pair

    Parameters
    ----------
    lon: float
        longitude coordinate
    lat: float
        latitude coordinate

    Returns
    ----------
    epsg_code: str
        optimal UTM zone, e.g. "32606"
    """
    utm_band = str((math.floor((lon + 180) / 6) % 60) + 1)
    if len(utm_band) == 1:
        utm_band = '0' + utm_band
    if lat >= 0:
        epsg_code = '326' + utm_band
        return epsg_code
    epsg_code = '327' + utm_band
    return epsg_code

def calculate_percent_image_aoi_coverage(ee_image, ee_aoi):
    # Create binary image of masked (0) and unmasked (1) pixels
    unmasked_pixels = ee_image.mask().reduce(ee.Reducer.allNonZero()).selfMask()
    # Calculate the area of unmasked pixels in the ROI
    pixel_area = ee.Image.pixelArea()
    aoi_area = ee_aoi.area()
    scale = ee_image.projection().nominalScale()
    unmasked_area = unmasked_pixels.multiply(pixel_area).reduceRegion(
        reducer = ee.Reducer.sum(),
        geometry = ee_aoi,
        scale = scale,  
        maxPixels = 1e13
    ).get('all') 
    # Calculate the percentage of the ROI covered by unmasked pixels
    percentage_unmasked = ee.Number(unmasked_area).divide(aoi_area).multiply(100)
    return ee_image.set('percent_AOI_coverage', percentage_unmasked).copyProperties(ee_image)

def adjust_dem_data_vars(dem):
    """

    Parameters
    ----------
    dem: xarray.Dataset
        digital elevation model (DEM)

    Returns
    -------
    dem: xarray.Dataset
        digital elevation model (DEM) with one band: "elevation"
    """
    if 'band_data' in dem.data_vars:
        dem = dem.rename({'band_data': 'elevation'})
    if 'band' in dem.dims:
        elev_data = dem.elevation.data[0]
        dem = dem.drop_dims('band')
        dem['elevation'] = (('y', 'x'), elev_data)
    return dem
    
def query_gee_for_dem(aoi_utm, base_path, site_name, out_path=None):
    """
    Query GEE for the ArcticDEM Mosaic (where there is coverage) or the NASADEM,
    clip to the AOI, and return as xarray.Dataset.

    Parameters
    ----------
    aoi_utm: geopandas.geodataframe.GeoDataFrame
        area of interest used for clipping the DEM, reprojected to the optimal UTM zone
    base_path: str
        path to 'snow-cover-mapping/' used to load ArcticDEM_Mosaic_coverage.shp
    site_name: str
        name of site used for saving output files
    out_path: str
        path where DEM will be saved (if size exceeds GEE limit). Default = None.

    Returns
    ----------
    dem_ds: xarray.Dataset
        dataset of elevations over the AOI
    """

    # -----Grab optimal UTM zone from AOI CRS
    epsg_utm = str(aoi_utm.crs.to_epsg())

    # -----Define function to transform ellipsoid to geoid heights
    def ellipsoid_to_geoid_heights(ds, base_path, out_path, out_fn):
        print('Transforming elevations from the ellipsoid to the geoid...')

        # Load EGM96 model from GEE Assets
        geoid_model_fn = os.path.join(base_path, 'inputs-outputs', 'us_nga_egm96_15.tif')
        geoid_model = xr.open_dataset(geoid_model_fn)

        # Resample geoid model to DEM coordinates
        geoid_model_resampled = geoid_model.interp(x=ds.x, y=ds.y, method='linear')
        geoid_height = geoid_model_resampled.band_data.data[0]

        # Subtract geoid heights from ds heights and update the dataset
        ds['elevation'] -= geoid_height

        # Re-save to file with updated elevations
        ds.rio.to_raster(os.path.join(out_path, out_fn), dtype='float32', zlib=True, compress='deflate')
        print('DEM re-saved with elevations referenced to the EGM96 geoid.')

        return ds

    # -----Define output image names, check if already exists in directory
    arcticdem_fn = site_name + '_ArcticDEM_clip.tif'
    arcticdem_geoid_fn = site_name + '_ArcticDEM_clip_geoid.tif'
    nasadem_fn = site_name + '_NASADEM_clip.tif'
    if os.path.exists(os.path.join(out_path, arcticdem_geoid_fn)):
        print('Clipped ArcticDEM referenced to the geoid already exists in directory, loading...')
        dem_ds = xr.open_dataset(os.path.join(out_path, arcticdem_geoid_fn))
        dem_ds = adjust_dem_data_vars(dem_ds)
    elif os.path.exists(os.path.join(out_path, arcticdem_fn)):
        print('Clipped ArcticDEM already exists in directory, loading...')
        dem_ds = xr.open_dataset(os.path.join(out_path, arcticdem_fn))
        dem_ds = adjust_dem_data_vars(dem_ds)
        # transform elevations from ellipsoid to geoid, save to file
        dem_ds = ellipsoid_to_geoid_heights(dem_ds, base_path, out_path, arcticdem_geoid_fn)
    elif os.path.exists(os.path.join(out_path, nasadem_fn)):
        print('Clipped NASADEM already exists in directory, loading...')
        dem_ds = xr.open_dataset(os.path.join(out_path, nasadem_fn))
        dem_ds = adjust_dem_data_vars(dem_ds)
    else:  # if no DEM exists in directory, load from GEE

        # -----Reformat AOI for clipping DEM
        aoi_ee = ee.Geometry.Polygon(list(zip(aoi.geometry[0].exterior.coords.xy[0], aoi.geometry[0].exterior.coords.xy[1])))

        # -----Check for ArcticDEM coverage over AOI
        # load ArcticDEM_Mosaic_coverage.shp
        arcticdem_coverage_fn = 'ArcticDEM_Mosaic_coverage.shp'
        arcticdem_coverage = gpd.read_file(os.path.join(base_path, 'inputs-outputs', arcticdem_coverage_fn))
        # reproject to optimal UTM zone
        arcticdem_coverage_utm = arcticdem_coverage.to_crs(f'EPSG:{epsg_utm}')
        # check for intersection with AOI
        intersects = arcticdem_coverage_utm.geometry[0].intersects(aoi_utm.geometry[0])
        # check for actual coverage of ArcticDEM (some sites have nearly empty DEM coverage even within data boundaries)
        coverage = False
        if intersects:
            dem = ee.Image('UMN/PGC/ArcticDEM/V3/2m_mosaic').clip(aoi_ee).select('elevation')
            percent_coverage = calculate_percent_image_aoi_coverage(dem, aoi_ee).get('percent_AOI_coverage').getInfo()
            if percent_coverage > 70:
                coverage = True
        # use ArcticDEM if intersects==True and coverage==True
        if intersects & coverage:
            print('ArcticDEM coverage over AOI')
            dem = ee.Image('UMN/PGC/ArcticDEM/V3/2m_mosaic').select('elevation').clip(aoi_ee)
            dem_fn = arcticdem_fn  # file name for saving
            scale = 10  # spatial resolution [m]
            elevation_source = 'ArcticDEM Mosaic (https://developers.google.com/earth-engine/datasets/catalog/UMN_PGC_ArcticDEM_V3_2m_mosaic)'
        else:
            print('No ArcticDEM coverage, using NASADEM')
            dem = ee.Image("NASA/NASADEM_HGT/001").select('elevation').clip(aoi_ee)
            dem_fn = nasadem_fn  # file name for saving
            scale = 30  # spatial resolution [m]
            elevation_source = 'NASADEM (https://developers.google.com/earth-engine/datasets/catalog/NASA_NASADEM_HGT_001)'
            # Check for NASADEM coverage
            percent_coverage = calculate_percent_image_aoi_coverage(dem, aoi_ee).get('percent_AOI_coverage').getInfo()
            if percent_coverage > 70:
                coverage = True

        # -----Check if either DEM had coverage over AOI
        if not coverage:
            print('Neither ArcticDEM nor NASADEM have at least 70% coverage over the AOI. Please acquire a different DEM.')
            return

        # -----Download DEM and open as xarray.Dataset
        print('Downloading DEM to ', out_path)
        # create out_path if it doesn't exist
        if not os.path.exists(out_path):
            os.mkdir(out_path)
        # convert DEM to geedim MaskedImage
        dem_gd = gd.MaskedImage(dem, mask=False, region=aoi_ee)
        # download DEM
        dem_gd.download(os.path.join(out_path, dem_fn), region=aoi_ee, scale=scale, crs="EPSG:4326")
        # read DEM as xarray.Dataset
        dem_ds = xr.open_dataset(os.path.join(out_path, dem_fn))
        dem_ds = adjust_dem_data_vars(dem_ds)

        # -----If using ArcticDEM, transform elevations with respect to the geoid (rather than the ellipsoid)
        if 'ArcticDEM' in elevation_source:
            dem_ds = ellipsoid_to_geoid_heights(dem_ds, base_path, out_path, arcticdem_geoid_fn)

    # -----Reproject DEM to UTM
    dem_ds = dem_ds.rio.reproject(f'EPSG:{epsg_utm}')
    dem_ds = xr.where((dem_ds > 1e38) | (dem_ds <= -9999), np.nan, dem_ds)
    dem_ds = dem_ds.rio.write_crs(f'EPSG:{epsg_utm}')

    return dem_ds


In [5]:
# Load AOI and get DEM
aoi = gpd.read_file(os.path.join(aoi_path, aoi_fn))
aoi = aoi.to_crs('EPSG:4326')

# Solve for optimal UTM zone
epsg_utm = convert_wgs_to_utm(aoi.geometry[0].centroid.coords.xy[0][0],
                              aoi.geometry[0].centroid.coords.xy[1][0])
aoi_utm = aoi.to_crs(f'EPSG:{epsg_utm}')

# Get DEM
dem_ds = query_gee_for_dem(aoi_utm, base_path, site_name, dem_path)

Clipped ArcticDEM referenced to the geoid already exists in directory, loading...


In [21]:
np.datetime64('2013-01-01', 'ns')

numpy.datetime64('2013-01-01T00:00:00.000000000')

In [22]:
# Landsat
im_xr_list = query_gee_for_imagery(aoi_utm, 
                                  dataset='Landsat', 
                                  start_date='2023-05-01', 
                                  end_date='2023-05-14', 
                                  start_month=5, 
                                  end_month=11, 
                                  percent_aoi_coverage=70, 
                                  im_download=True, 
                                  out_path=l_im_path, 
                                  run_pipeline=False)

  0%|          | 0/3 [00:00<?, ?it/s]

In [23]:
im_xr_list[0]