# Load ERA5-Land air temperature data, save as .csv at each study site

In [None]:
!pip install wxee

In [None]:
import ee
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import wxee
import numpy as np
import os
import glob

In [None]:
# If using Google Colab, mount Google Drive so you can access your Drive folders
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Authenticate and intialize Google Earth Engine
try:
  ee.Initialize()
except:
  ee.Authenticate()
  ee.Initialize()

In [None]:
# Define path to 'snow_cover_mapping_application/study-sites/'
study_sites_path = 'drive/MyDrive/Research/PhD/snow_cover_mapping/snow_cover_mapping_application/study-sites/'

In [None]:
# Uncomment and use this to locate the folder above, if needed
# os.listdir('drive/MyDrive/Research/PhD/snow_cover_mapping/snow_cover_mapping_application/study-sites/')

In [None]:
# Grab list of study site names in folder
os.chdir(study_sites_path)
site_names = sorted([x[0:-1] for x in glob.glob('*/', recursive = True)])
site_names

In [None]:
# Define date and month ranges for ERA5 querying (query each year separately)
years = np.arange(2010, 2023).astype(str)
date_starts = [year + '-05-01' for year in years]
date_ends = [year + '-11-01' for year in years]

# Loop through each study site
for site_name in ['Emmons']:#, 'Blue', 'Boulder', 'Carbon', 'Coleman',
                  # 'RGI60-01.00037', 'RGI60-01.00038', 'RGI60-01.00046',
                  # 'RGI60-01.00312', 'RGI60-01.00566']:

  print(site_name)

  # Load AOI (glacier outline)
  AOI_fn = glob.glob(site_name + '/AOIs/*_outline.shp')[0]
  AOI = gpd.read_file(AOI_fn)
  # reproject to WGS84
  AOI_WGS = AOI.to_crs('EPSG:4326')

  # Reformat AOI for GEE querying
  region = ee.Geometry.Polygon([[[AOI_WGS.geometry.bounds.minx[0], AOI_WGS.geometry.bounds.miny[0]],
                                [AOI_WGS.geometry.bounds.maxx[0], AOI_WGS.geometry.bounds.miny[0]],
                                [AOI_WGS.geometry.bounds.maxx[0], AOI_WGS.geometry.bounds.maxy[0]],
                                [AOI_WGS.geometry.bounds.minx[0], AOI_WGS.geometry.bounds.maxy[0]],
                                [AOI_WGS.geometry.bounds.minx[0], AOI_WGS.geometry.bounds.miny[0]]
                                ]])

  # Loop through date ranges (must faster for downloading!)
  air_temp_df = pd.DataFrame() # initialize dataframe for storing data
  for date_start, date_end in zip(date_starts, date_ends):

    print(date_start, date_end)

    # Load ERA5-Land Daily Aggregate product
    era = (ee.ImageCollection("ECMWF/ERA5_LAND/DAILY_AGGR")
          .filterDate(ee.Date(date_start), ee.Date(date_end)))

    # clip to AOI, select air temperature band
    def clip_to_region(im):
        return im.clip(region)
    era_clip = era.select('temperature_2m').map(clip_to_region)

    # convert to xarray.Dataset
    # ground resolution = 11132 m
    era_xr = era_clip.wx.to_xarray(scale=11132, region=region)

    # construct dataframe of mean air temperature over time
    for i in range(0,len(era_xr.time.data)):
        era_xr_day = era_xr.isel(time=i)
        air_temp_df_day = pd.DataFrame({'Date': [era_xr_day.time.data],
                                        'Mean 2m air temp. [K]': [np.nanmean(np.ravel(era_xr_day['temperature_2m'].data))]
                                      })
        air_temp_df = pd.concat([air_temp_df, air_temp_df_day])

  # Add column for degrees Celsius
  air_temp_df['Mean 2m air temp. [C]'] = air_temp_df['Mean 2m air temp. [K]'].values - 273
  # set temperatures below freezing to 0
  air_temp_df.loc[air_temp_df['Mean 2m air temp. [C]'] < 0, 'Mean 2m air temp. [C]'] = 0
  # calculate PDDs
  air_temp_df.reset_index(drop=True, inplace=True)
  air_temp_df['Date'] = [np.datetime64(x) for x in air_temp_df['Date'].values]
  air_temp_df['year'] = pd.DatetimeIndex(air_temp_df['Date']).year
  air_temp_df['PDD'] = air_temp_df.groupby('year')['Mean 2m air temp. [C]'].cumsum().values
  # drop extra columns
  air_temp_df = air_temp_df[['Date', 'Mean 2m air temp. [K]', 'PDD']]

  # define out_path
  out_path = site_name + '/ERA/'

  # create out_path if it doesn't exist
  if not os.path.exists(out_path):
    os.mkdir(out_path)

  # save to file as .csv
  out_fn = site_name + '_ERA5_air_temp_' + date_starts[0] + '_' + date_ends[-1] + '.csv'
  air_temp_df.to_csv(out_path + out_fn, index=False)
  print('Mean ERA5 air temperature saved to file: ' + out_path + out_fn)
  print(' ')