**Step 1: Download ERA5 2-m temperature or total precipitation from CDS website. 

In [None]:
%matplotlib inline
import xarray as xr
import numpy as np 
import matplotlib.pyplot as plt
import os
import pandas as pd
from datetime import datetime, timedelta, date

import dask
import dask.array as dda
import dask.distributed as dd

# rhodium-specific kubernetes cluster configuration
import rhg_compute_tools.kubernetes as rhgk

In [None]:
 # install copernicus API to create client instances
# !pip install cdsapi 
import cdsapi 

In [None]:
client, cluster = rhgk.get_micro_cluster(extra_pip_packages='cdsapi')
# cluster.scale(0)

In [None]:
cluster

In [None]:
def retrieve_daily_era5(spec):
    '''
    inputs: year(str), month(str), day(str), list of hours in a day, variable ('tas', 'precip'), 
    file_format ('netcdf', 'grib')
    output: daily grib or NetCDF file with hourly ERA-5 data for tas or precip 
    '''
    
    c, variable, file_format, grid, year, month, day, hours = spec
    
    variable_names = {'tas': '2m_temperature', 'precip': 'total_precipitation'}
    filenames = {'tas': 't2m', 'precip': 'total_precip'}
    file_extensions = {'netcdf': 'nc', 'grib': 'grib'}
    
    era5_tmp_dir = '/gcs/impactlab-data/climate/source_data/ERA-5/%s/hourly/%s' %(variable, file_format)
    filename = '%s_%s_%s_%s.%s' %(filenames[variable], year, month, day, file_extensions[file_format]) 
    filepath = os.path.join(era5_tmp_dir, filename)
    
    # first check to be sure file exists
    if os.path.isfile(filepath):
        pass
    else:
        c.retrieve(
        'reanalysis-era5-single-levels',
        {
            'product_type':'reanalysis',
            'variable':variable_names[variable], 
            'year':year,
            'month':month,
            'day':day,
            'time':hours,
            'format':file_format,
            'grid': grid,
        },
        filepath)

Note: more information on the ERA-5 grid configurations is available here: https://confluence.ecmwf.int/display/CKB/ERA5%3A+What+is+the+spatial+reference#ERA5:Whatisthespatialreference-Gridresolution

In [None]:
'''era_start = '01-01-1979'
era_end = '12-31-2020'''
# adjust for bias correction reference period 
era_start = '01-01-1994'
era_end = '01-01-2016'
# make list of daily datetime indices, this includes leap years 
dt_index_full = pd.date_range(start=era_start, end=era_end, freq='D')

# reformat month/day for the retrieval function 
dt_index_years = dt_index_full.year.astype(str)
dt_index_months = dt_index_full.month.map("{:02}".format)
dt_index_days = dt_index_full.day.map("{:02}".format)
daynum = dt_index_full.dayofyear

# make list of hours for retrieval function -- only need one example day
hours = [hr.strftime("%H:%M") for hr in pd.date_range(start='01-01-1979', end='01-02-1979', freq='H')[:-1]]

# set grid 
# the 'F320' grid is the regular Gaussian corresponding to the reduced Gaussian N320 that the native ERA-5 atmosphere 
# model is on 
grid = 'F320'

variable = 'precip'

# set file format: can be grib or netcdf 
file_format = 'netcdf'

In [None]:
c_list = []
for i, day in enumerate(dt_index_days):
    c_list.append(cdsapi.Client())

In [None]:
JOBS = [(c, variable, file_format, grid, year, month, day, hours) for c, year, month, day in zip(c_list, 
                                                                    dt_index_years, 
                                                                    dt_index_months, 
                                                                    dt_index_days)]

In [None]:
futures_1 = client.map(retrieve_daily_era5, JOBS[:1500])
dd.progress(futures_1)

In [None]:
futures_2 = client.map(retrieve_daily_era5, JOBS[1500:3000])
dd.progress(futures_2)

In [None]:
futures_3 = client.map(retrieve_daily_era5, JOBS[3000:4500])
dd.progress(futures_3)

In [None]:
futures_4 = client.map(retrieve_daily_era5, JOBS[4500:6000])
dd.progress(futures_4)

In [None]:
futures_5 = client.map(retrieve_daily_era5, JOBS[6000:7500])
dd.progress(futures_5)

In [None]:
futures_6 = client.map(retrieve_daily_era5, JOBS[7500:9000])
dd.progress(futures_6)

In [None]:
futures_7 = client.map(retrieve_daily_era5, JOBS[9000:10500])
dd.progress(futures_7)

In [None]:
futures_8 = client.map(retrieve_daily_era5, JOBS[10500:12000])
dd.progress(futures_8)

In [None]:
futures_9 = client.map(retrieve_daily_era5, JOBS[12000:13500])
dd.progress(futures_9)

In [None]:
futures_10 = client.map(retrieve_daily_era5, JOBS[13500:])
dd.progress(futures_10)