# Load and save ERA5-Land Daily Aggregated band values at point coordinates


## Requirements: 

- __Google Earth Engine account__. Sign up [here](https://earthengine.google.com/signup/).
- __GIS file__ of the Area of Interest (AOI) boundaries (.shp, .gpkg, or other file readable by geopandas). 
- __Digital Elevation Model__ (DEM) over the AOI referenced to the ellipsoid (TIF, netCDF, or other file readable by xarray). 
- __ERA5-Land gridded geopotential__ file, used to calculate surface heights (TIF, netCDF, or other file readable by xarray). Options for access:
    - This code repository: "geo_1279l4_0.1x0.1.grib2_v4_unpack.nc" in the [`inputs-outputs` folder](https://github.com/RaineyAbe/snow-cover-mapping-application/tree/main/inputs-outputs). 
    - Download from the [ECMFW documentation for ERA5-Land](https://confluence.ecmwf.int/display/CKB/ERA5-Land%3A+data+documentation#ERA5Land:datadocumentation-LandSurfaceModel) (see Table 1).

In [None]:
import ee
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import glob
from tqdm.auto import tqdm
import xarray as xr
import rioxarray as rxr
from shapely.geometry import Polygon, LineString
import geojson

## Authenticate and initialize Google Earth Engine

In [None]:
try:
    ee.Initialize()
except:
    ee.Authenticate()
    ee.Initialize()

## Define paths in directory

In [None]:
# Define name of study site, used in output file names
site_name = 'Hubbard'

# Full path to glacier boundaries
aoi_fn = '/Users/raineyaberle/Research/Hubbard/RGI/Hubbard_boundaries.shp'

# Full path to sample points GIS file
line_fn = '/Users/raineyaberle/Research/Hubbard/velocity/center.gpkg'

# Full path to DEM
dem_fn = '/Users/raineyaberle/Research/Hubbard/DEMs/ifsar_hubbardDEM.tif'

# Full path to geopotential for ERA5-Land
era_geo_fn =  '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping-application/inputs-outputs/geo_1279l4_0.1x0.1.grib2_v4_unpack.nc'

# Path where output files will be saved
out_path = '/Users/raineyaberle/Research/Hubbard/weather/'

## Define filters, etc. for querying GEE for ERA5-Land

In [None]:
# -----Date range 
start_date = '2016-11-01'
end_date = '2023-12-01'

# -----Bands to extract
# See all data bands in the GEE documentation here: 
# https://developers.google.com/earth-engine/datasets/catalog/ECMWF_ERA5_LAND_DAILY_AGGR#bands
bands = ['temperature_2m', 
         'total_precipitation_sum', 
         'snowfall_sum', 
         'snowmelt_sum'] 

# -----Lapse rate used to adjust air temperatures for elevation
lapse_rate = 6 # deg C / km

## Load glacier boundaries, sample points, and DEM

In [None]:
# -----Load glacier boundaries (aoi)
aoi = gpd.read_file(aoi_fn)
aoi_wgs = aoi.to_crs('EPSG:4326')
aoi_wgs_ee = ee.Geometry.Polygon(list(zip(aoi_wgs.geometry[0].exterior.coords.xy[0],
                                          aoi_wgs.geometry[0].exterior.coords.xy[1])))

# -----Load sample points
line = gpd.read_file(line_fn)
line_wgs = line.to_crs('EPSG:4326')

# -----Load DEM
dem = rxr.open_rasterio(dem_fn)
# reproject to WGS84
dem = dem.rio.reproject('EPSG:4326')
# remove no data values
dem = xr.where((dem > 1e38) | (dem<=-9999), np.nan, dem)
dem = dem.rio.write_crs('EPSG:4326') 
# clip to glacier boundaries
dem_clip = dem.rio.clip(aoi_wgs.geometry.values, aoi_wgs.crs)

# -----Plot
fig, ax = plt.subplots()
dem_im = ax.imshow(dem_clip.data[0], cmap='terrain', 
                   extent=(np.min(dem_clip.x.data), np.max(dem_clip.x.data), 
                           np.min(dem_clip.y.data), np.max(dem_clip.y.data)))
fig.colorbar(dem_im, ax=ax, label='Elevation [m]', shrink=0.7)
aoi_wgs.plot(ax=ax, facecolor='None', edgecolor='k')
ax.plot(*line_wgs.geometry[0].coords.xy, '.-m', label='Sample points')
ax.legend(loc='best')
plt.show()

## Load and calculate ERA5-Land ellipsoid heights, reproject to the geoid

In [None]:
# Full path to geopotential for ERA5-Land
era_geo_fn =  '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping-application/inputs-outputs/geo_1279l4_0.1x0.1.grib2_v4_unpack.nc'

# Load ERA5-Land reference elevation data
era_geo = xr.open_dataset(era_geo_fn)
era_geo = era_geo / 9.8

# Plot ellipsoid heights
print("Note: Longitude degree values are incorrect \n Xarray won't let me make coordinates go from positive -> negative values")
plt.figure(figsize=(10,6))
plt.imshow(era_geo.z.data[0], extent=(0,360,-90,90), cmap='terrain')
plt.title('Ellipsoid heights calculated from ERA5-Land geopotential')
plt.colorbar(label='meters', shrink=0.5)
plt.grid()
plt.show()

## Query GEE for ERA5-Land data, save to file as CSVs for each band

In [None]:
def sample_image_at_points(image):
    return ee.FeatureCollection(image.sampleRegions(
        collection=ee.Geometry.Point(point),
        scale=500,  
        geometries=True
    ))

# -----Convert line to a list of points
points = list(zip(line_wgs.geometry[0].coords.xy[0], 
                  line_wgs.geometry[0].coords.xy[1]))

# -----Query GEE for ERA5-Land image collection
era5_land = (ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR')
             .filterDate(start_date, end_date)
             .filterBounds(aoi_wgs_ee))

# -----Iterate over bands 
for band in bands:
    print('\n' + band)

    # Check if data already exist in file
    out_fn = os.path.join(out_path, f'{site_name}_ERA5-Land_{start_date}_{end_date}_{band}.csv')
    if os.path.exists(out_fn):
        print('Band data already exist in file, skipping...')
        continue
    
    # Initialiize dataframe for band data
    band_df = pd.DataFrame()
    
    # Iterate over points
    for point in tqdm(points):
        band_fc = era5_land.select(band).map(sample_image_at_points).flatten().getInfo()
        if len(band_fc['features']) > 0:
            lon = band_fc['features'][0]['geometry']['coordinates'][0] 
            lat = band_fc['features'][0]['geometry']['coordinates'][1] 
            dates = [band_fc['features'][i]['id'][0:8] for i in range(len(band_fc['features']))]
            dates = [f'{date[0:4]}-{date[4:6]}-{date[6:8]}' for date in dates]
            band_values = [band_fc['features'][i]['properties'][band] for i in range(len(band_fc['features']))]
            band_point_df = pd.DataFrame({'lon': [lon], 'lat': [lat]})
            for i, date in enumerate(dates):
                df = pd.DataFrame({date: [band_values[i]]})
                band_point_df = pd.concat([band_point_df, df], axis=1)

            band_df = pd.concat([band_df, band_point_df])

    # Save to file
    band_df.to_csv(out_fn, index=False)
    print('Band data saved to file:', out_fn)

    # Plot
    band_df.reset_index(drop=True, inplace=True)
    fig, ax = plt.subplots()
    band_df[band_df.columns[2:]].transpose().plot(ax=ax)
    ax.set_title(band)
    plt.show()


## Adjust air temperatures for elevation

In [None]:
# Load file
temp_fn = os.path.join(out_path,  f'{site_name}_ERA5-Land_{start_date}_{end_date}_temperature_2m.csv')
temp_df = pd.read_csv(temp_fn)

# Convert from Kelvin to Celsius
temp_df[temp_df.columns[2:]] = temp_df[temp_df.columns[2:]] - 273.15
# # Adjust air temperatures using reference elevations and lapse rate
# df['temperature_2m_C_adjusted'] = df['temperature_2m_C'] - lapse_rate * (zmed - elev_med_era)/1e3

# Sample elevations from DEM and ERA5-Land ellipsoid heights at each point
temp_df['elevation_DEM'] = [dem.sel(x=x, y=y, method='nearest').data[0] for (x,y) in temp_df[['lon', 'lat']].values]
temp_df['elevation_ERA'] = [era_geo.sel(longitude=x + 360, latitude=y, method='nearest').z.data[0] for (x,y) in temp_df[['lon', 'lat']].values]

# Adjust temperatures for elevation at each point
columns = [x for x in list(temp_df.columns) if '20' in x]
for i in range(len(temp_df)):
    temp_df.loc[i, columns] = (temp_df.loc[i, columns] 
                               - lapse_rate * (temp_df.loc[i, 'elevation_DEM'] 
                                               - temp_df.loc[i, 'elevation_ERA'])/1e3)

# Plot
fig, ax = plt.subplots(figsize=(8,5))
temp_df[columns].transpose().plot(ax=ax)
ax.set_title('Adjusted air temperature')
ax.set_ylabel('Temperature [$^{\circ}$C]')
plt.show()

# Save to file
out_fn = temp_fn.replace('2m.csv', '2m_Celsius_adjusted.csv')
temp_df.to_csv(out_fn)
print('Adjusted air temperatures saved to file:', out_fn)