**Step 1: Download ERA5 2-m temperature from CDS website** 

In [1]:
%matplotlib inline
import xarray as xr
import numpy as np 
import matplotlib.pyplot as plt
import os
import pandas as pd
from datetime import datetime, timedelta, date

import dask
import dask.array as dda
import dask.distributed as dd

# rhodium-specific kubernetes cluster configuration
import rhg_compute_tools.kubernetes as rhgk

In [2]:
 # install copernicus API to create client instances
!pip install cdsapi 
import cdsapi 



In [3]:
client, cluster = rhgk.get_big_cluster(extra_pip_packages='cdsapi')
cluster.scale(30)

In [4]:
client

0,1
Client  Scheduler: gateway://traefik-impactlab-hub-dask-gateway.impactlab-hub:80/impactlab-hub.b4e5b9c3a5e643518d0f7cc7c247985e  Dashboard: /services/dask-gateway/clusters/impactlab-hub.b4e5b9c3a5e643518d0f7cc7c247985e/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [10]:
cluster.close()

In [5]:
def retrieve_daily_era5(spec):
    '''
    inputs: year(str), month(str), day(str), list of hours in a day
    output: daily grib file with hourly ERA-5 data
    '''
    
    c, year, month, day, hours = spec
    
    era5_tmp_dir = '/gcs/impactlab-data/climate/source_data/ERA-5/hourly'
    filename = 't2m_%s_%s_%s.grib' %(year, month, day)
    filepath = os.path.join(era5_tmp_dir, filename)
    
    c.retrieve(
    'reanalysis-era5-single-levels',
    {
        'product_type':'reanalysis',
        'variable':'2m_temperature',
        'year':year,
        'month':month,
        'day':day,
        'time':hours,
        'format':'netcdf'
    },
    filepath)

In [6]:
era_start = '01-01-1979'
era_end = '12-31-2020'
# make list of daily datetime indices, this includes leap years 
dt_index_full = pd.date_range(start=era_start, end=era_end, freq='D')

# reformat month/day for the retrieval function 
dt_index_years = dt_index_full.year.astype(str)
dt_index_months = dt_index_full.month.map("{:02}".format)
dt_index_days = dt_index_full.day.map("{:02}".format)
daynum = dt_index_full.dayofyear

# make list of hours for retrieval function -- only need one example day
hours = [hr.strftime("%H:%M") for hr in pd.date_range(start='01-01-1979', end='01-02-1979', freq='H')[:-1]]

In [7]:
c_list = []
for i, day in enumerate(dt_index_days):
    c_list.append(cdsapi.Client())

In [8]:
JOBS = [(c, year, month, day, hours) for c, year, month, day in zip(c_list, 
                                                                    dt_index_years, 
                                                                    dt_index_months, 
                                                                    dt_index_days)]

In [20]:
futures_1 = client.map(retrieve_daily_era5, JOBS[:1500])
dd.progress(futures_1)

VBox()

In [21]:
futures_2 = client.map(retrieve_daily_era5, JOBS[1500:3000])
dd.progress(futures_2)

VBox()

In [22]:
futures_3 = client.map(retrieve_daily_era5, JOBS[3000:4500])
dd.progress(futures_3)

VBox()

In [23]:
futures_4 = client.map(retrieve_daily_era5, JOBS[4500:6000])
dd.progress(futures_4)

VBox()

In [11]:
futures_5 = client.map(retrieve_daily_era5, JOBS[6000:7500])
dd.progress(futures_5)

VBox()

In [None]:
futures_6 = client.map(retrieve_daily_era5, JOBS[7500:9000])
dd.progress(futures_6)

In [None]:
futures_7 = client.map(retrieve_daily_era5, JOBS[9000:10500])
dd.progress(futures_7)

In [None]:
futures_8 = client.map(retrieve_daily_era5, JOBS[10500:12000])
dd.progress(futures_8)

In [None]:
futures_9 = client.map(retrieve_daily_era5, JOBS[12000:13500])
dd.progress(futures_9)

In [None]:
futures_10 = client.map(retrieve_daily_era5, JOBS[13500:])
dd.progress(futures_10)

In [1]:
# Check download ouput
! ls /gcs/impactlab-data/climate/source_data/ERA-5/hourly

grib_files	   t2m_1989_10_01.nc  t2m_2000_07_02.nc  t2m_2011_04_03.nc
t2m_1979_01_01.nc  t2m_1989_10_02.nc  t2m_2000_07_03.nc  t2m_2011_04_04.nc
t2m_1979_01_02.nc  t2m_1989_10_03.nc  t2m_2000_07_04.nc  t2m_2011_04_05.nc
t2m_1979_01_03.nc  t2m_1989_10_04.nc  t2m_2000_07_05.nc  t2m_2011_04_06.nc
t2m_1979_01_04.nc  t2m_1989_10_05.nc  t2m_2000_07_06.nc  t2m_2011_04_07.nc
t2m_1979_01_05.nc  t2m_1989_10_06.nc  t2m_2000_07_07.nc  t2m_2011_04_08.nc
t2m_1979_01_06.nc  t2m_1989_10_07.nc  t2m_2000_07_08.nc  t2m_2011_04_09.nc
t2m_1979_01_07.nc  t2m_1989_10_08.nc  t2m_2000_07_09.nc  t2m_2011_04_10.nc
t2m_1979_01_08.nc  t2m_1989_10_09.nc  t2m_2000_07_10.nc  t2m_2011_04_11.nc
t2m_1979_01_09.nc  t2m_1989_10_10.nc  t2m_2000_07_11.nc  t2m_2011_04_12.nc
t2m_1979_01_10.nc  t2m_1989_10_11.nc  t2m_2000_07_12.nc  t2m_2011_04_13.nc
t2m_1979_01_11.nc  t2m_1989_10_12.nc  t2m_2000_07_13.nc  t2m_2011_04_14.nc
t2m_1979_01_12.nc  t2m_1989_10_13.nc  t2m_2000_07_14.nc  t2m_2011_04_15.nc
t2m_1979_01_13.nc  t2m_1989_10