# Try fixing image ID errors when querying from GEE

### 0. Setup

#### Define paths in directory and desired settings. 
Modify lines located within the following:

`#### MODIFY HERE ####`  

`#####################`

In [None]:
##### MODIFY HERE #####

# -----Paths in directory
site_name = 'RGI60-01.00038'
# path to snow-cover-mapping/ - Make sure you include a "/" at the end
base_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping/'
# path to folder containing AOI files
AOI_path = '/Users/raineyaberle/Google Drive/My Drive/Research/PhD/snow_cover_mapping/study-sites/' + site_name + '/AOIs/'
# AOI file name
AOI_fn = site_name + '_outline.shp' 
# path to folder containing DEM raster file
# Note: set DEM_fn=None if you want to use the ArcticDEM or ASTER GDEM via Google Earth Engine
DEM_path = AOI_path + '../DEMs/'
# DEM file name
DEM_fn = None
# path for output images
out_path = AOI_path + '../imagery/'
# path to PlanetScope images
# Note: set PS_im_path=None if not using PlanetScope
PS_im_path = out_path + 'PlanetScope/raw_images/'
# path for output figures
figures_out_path = AOI_path + '../figures/'

# -----Define image search filters
date_start = '2013-05-01'
date_end = '2022-12-01'
month_start = 5
month_end = 10
cloud_cover_max = 70

# -----Determine whether to mask clouds using the respective cloud masking data products
# NOTE: Cloud mask products anecdotally are less accurate over glacierized/snow-covered surfaces. 
# If the cloud masks are consistently masking large regions or your study site, I suggest setting mask_clouds = False
mask_clouds = True

# -----Determine image download, clipping & plotting settings
# Note: if im_download = False, but images over the AOI exceed GEE limit,
# images must be downloaded regardless.
im_download = False  # = True to download all satellite images by default
plot_results = True # = True to plot figures of results for each image where applicable
skip_clipped = False # = True to skip images where bands appear "clipped", i.e. max(blue) < 0.8
crop_to_AOI = True # = True to crop images to AOI before calculating SCA
save_outputs = True # = True to save SCAs and snowlines to file
save_figures = True # = True to save output figures to file

#######################

# -----Import packages
import xarray as xr
import os
import numpy as np
import glob
from matplotlib import pyplot as plt, dates
import matplotlib
import rasterio as rio
import geopandas as gpd
import pandas as pd
import sys
import ee
import geedim as gd
import json
from tqdm.auto import tqdm
from joblib import dump, load
from shapely.geometry import MultiPolygon, Polygon

# -----Set paths for output files
S2_TOA_im_path = out_path + 'Sentinel-2_TOA/'
S2_SR_im_path = out_path + 'Sentinel-2_SR/'
L_im_path = out_path + 'Landsat/'
PS_im_masked_path = out_path + 'PlanetScope/masked/'
PS_im_mosaics_path = out_path + 'PlanetScope/mosaics/'
im_classified_path = out_path + 'classified/'
snowlines_path = out_path + 'snowlines/'

# -----Add path to functions
sys.path.insert(1, base_path+'functions/')
import pipeline_utils as f

# -----Load dataset dictionary
dataset_dict = json.load(open(base_path + 'inputs-outputs/datasets_characteristics.json'))

#### Authenticate and initialize Google Earth Engine (GEE). 

__Note:__ The first time you run the following cell, you will be asked to authenticate your GEE account for use in this notebook. This will send you to an external web page, where you will walk through the GEE authentication workflow and copy an authentication code back into the space below this cell when prompted. 

In [None]:
try:
    ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')
except: 
    ee.Authenticate()
    ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')

#### Load AOI and DEM

In [None]:
# -----Load AOI as gpd.GeoDataFrame
AOI = gpd.read_file(AOI_path + AOI_fn)
# reproject the AOI to WGS to solve for the optimal UTM zone
AOI_WGS = AOI.to_crs('EPSG:4326')
AOI_WGS_centroid = [AOI_WGS.geometry[0].centroid.xy[0][0],
                    AOI_WGS.geometry[0].centroid.xy[1][0]]
# grab the optimal UTM zone EPSG code
epsg_UTM = f.convert_wgs_to_utm(AOI_WGS_centroid[0], AOI_WGS_centroid[1])
print('Optimal UTM CRS = EPSG:' + str(epsg_UTM))
# reproject AOI to the optimal UTM zone
AOI_UTM = AOI.to_crs('EPSG:'+epsg_UTM)

# -----Load DEM as Xarray DataSet
if DEM_fn is None:
    # query GEE for DEM
    DEM = f.query_gee_for_dem(AOI_UTM, base_path, site_name, DEM_path)
else:
    # load DEM as xarray DataSet
    DEM = xr.open_dataset(DEM_path + DEM_fn)
    DEM = DEM.rename({'band_data': 'elevation'})
    # reproject the DEM to the optimal UTM zone
    DEM = DEM.rio.reproject('EPSG:'+str(epsg_UTM))
    DEM = DEM.rio.write_crs('EPSG:'+str(epsg_UTM))
# remove unnecessary data (possible extra bands from ArcticDEM or other DEM)
if len(np.shape(DEM.elevation.data))>2:
    DEM['elevation'] = DEM.elevation[0]
    DEM = xr.where(DEM < -100, np.nan, DEM)
    DEM = DEM.rio.write_crs('EPSG:'+str(epsg_UTM))

# -----Plot
fig, ax = plt.subplots(1, 1, figsize=(6,6))
dem_im = ax.imshow(DEM.elevation.data, cmap='terrain', 
          extent=(np.min(DEM.x.data)/1e3, np.max(DEM.x.data)/1e3, np.min(DEM.y.data)/1e3, np.max(DEM.y.data)/1e3))
if type(AOI_UTM.geometry[0])==Polygon:
    ax.plot([x/1e3 for x in AOI_UTM.geometry[0].exterior.coords.xy[0]],
            [y/1e3 for y in AOI_UTM.geometry[0].exterior.coords.xy[1]], '-k')
elif type(AOI_UTM.geometry[0])==MultiPolygon:
    [ax.plot([x/1e3 for x in geom.exterior.coords.xy[0]],
            [y/1e3 for y in geom.exterior.coords.xy[1]], '-k') for geom in AOI_UTM.geometry[0].geoms]
ax.grid()
ax.set_xlabel('Easting [km]')
ax.set_ylabel('Northing [km]')
fig.colorbar(dem_im, ax=ax, shrink=0.5, label='Elevation [m]')
plt.show()

## 2. Sentinel-2 SR imagery

In [None]:
# date_start = '2022-05-15'
# date_end = '2022-06-15'

In [None]:
# -----Query GEE for imagery and download to S2_SR_im_path if necessary
dataset = 'Sentinel-2_SR'
im_col_gd = query_gee_for_imagery(dataset_dict, dataset, AOI_UTM, date_start, date_end, month_start, 
                                  month_end, cloud_cover_max, mask_clouds, S2_SR_im_path, im_download)

In [None]:
try:
    properties = im_col_gd.properties
except Exception as e:
    print(e)
    exc_id = str(e).split('ID=')[1].split(')')[0]
    exc_date = exc_id[0:4] + '-' + exc_id[4:6] + '-' + exc_id[6:8]
    print('Error accessing image ID: ' + exc_id)
    print('Error image date: ' + exc_date) 
    print('Removing from collection...')

In [None]:
# prepare AOI for querying geedim (AOI bounding box)
region = ee.Geometry.Polygon([[[AOI_WGS.geometry.bounds.minx[0], AOI_WGS.geometry.bounds.miny[0]],
                           [AOI_WGS.geometry.bounds.maxx[0], AOI_WGS.geometry.bounds.miny[0]],
                           [AOI_WGS.geometry.bounds.maxx[0], AOI_WGS.geometry.bounds.maxy[0]],
                           [AOI_WGS.geometry.bounds.minx[0], AOI_WGS.geometry.bounds.maxy[0]],
                           [AOI_WGS.geometry.bounds.minx[0], AOI_WGS.geometry.bounds.miny[0]]
                           ]])

In [None]:
im_col = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
          .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', cloud_cover_max))
          .filterDate(ee.Date(date_start), ee.Date(date_end))
          .filter(ee.Filter.calendarRange(5, 10, 'month'))
          .filterBounds(region))
im_ids = im_col.aggregate_array('system:id').getInfo()

im_ids


In [None]:
# -----Load trained classifier and feature columns
clf_fn = base_path+'inputs-outputs/Sentinel-2_SR_classifier_all_sites.joblib'
clf = load(clf_fn)
feature_cols_fn = base_path+'inputs-outputs/Sentinel-2_SR_feature_columns.json'
feature_cols = json.load(open(feature_cols_fn))

# -----Loop through images
if type(im_list)==str: # check that images were found
    print('No images found to classify, quiting...')
else:
    
    for i in tqdm(range(0, len(im_list))):
        
        # -----Subset image using loop index
        im_xr = im_list[i]
        im_date = str(im_xr.time.data[0])[0:19]
        print(im_date)
        
        # -----Adjust image for image scalar and no data values
        # replace no data values with NaN and account for image scalar
        crs = im_xr.rio.crs.to_epsg()
        if np.nanmean(im_xr['B2'])>1e3:
            im_xr = xr.where(im_xr==dataset_dict[dataset]['no_data_value'], np.nan, 
                             im_xr / dataset_dict[dataset]['image_scalar'])
        else:
            im_xr = xr.where(im_xr==dataset_dict[dataset]['no_data_value'], np.nan, im_xr)
        # add NDSI band
        im_xr['NDSI'] = ((im_xr[dataset_dict[dataset]['NDSI_bands'][0]] - im_xr[dataset_dict[dataset]['NDSI_bands'][1]]) 
                             / (im_xr[dataset_dict[dataset]['NDSI_bands'][0]] + im_xr[dataset_dict[dataset]['NDSI_bands'][1]]))
        im_xr.rio.write_crs('EPSG:'+str(crs), inplace=True)
                
        # -----Classify image
        # check if classified image already exists in file
        im_classified_fn = im_date.replace('-','').replace(':','') + '_' + site_name + '_' + dataset + '_classified.nc'
        if os.path.exists(im_classified_path + im_classified_fn):
            print('Classified image already exists in file, continuing...')
            im_classified = xr.open_dataset(im_classified_path + im_classified_fn)
            # remove no data values
            im_classified = xr.where(im_classified==-9999, np.nan, im_classified)
        else:  
            # classify image
            im_classified = f.classify_image(im_xr, clf, feature_cols, crop_to_AOI, AOI_UTM, DEM,
                                             dataset_dict, dataset, im_classified_fn, im_classified_path)
            if type(im_classified)==str: # skip if error in classification
                continue
        
        # -----Delineate snowline(s)
        # check if snowline already exists in file
        snowline_fn = im_date.replace('-','').replace(':','') + '_' + site_name + '_' + dataset + '_snowline.csv'
        if os.path.exists(snowlines_path + snowline_fn):
            print('Snowline already exists in file, continuing...')
            continue # no need to load snowline if it already exists
        else:
            plot_results = True
            # create directory for figures if it doesn't already exist
            if (not os.path.exists(figures_out_path)) & plot_results:
                os.mkdir(figures_out_path)
                print('Created directory for output figures: '+figures_out_path)
            snowline_df = f.delineate_image_snowline(im_xr, im_classified, site_name, AOI_UTM, dataset_dict, dataset, 
                                                     im_date, snowline_fn, snowlines_path, figures_out_path, plot_results)
            # plt.show()
            print('Accumulation Area Ratio =  ' + str(snowline_df['AAR'][0]))
        print(' ')
