## Data Downloader for ERA5 and GLoFAS data

defines some methods to download era5 data

In [None]:
import cdsapi
import os
import os.path.join as path

In [None]:
def era5_retrieval(savename='era5', base_level='pressure', form='netcdf', area='50/7/47/20',
                   variable='geopotential', pressure_level='700', time_start='2017:05', time_end='2017:07'):
    # download era5 data with the cdsapi
    # data request efficiency is highest when executed on a monthly basis
    years = range(int(time_start.split(':')[0]), int(time_end.split(':')[0])+1)
    months = range(int(time_start.split(':')[1]), int(time_end.split(':')[1])+1)

    # loop over time range
    for y in years:
        for m in months:
            # start a request for one month
            # only execute if file does not exist
            if not os.path.isfile(f'data/{savename}_{str(y)}_{"%:02d".format(m)}.nc'):
                request_data(savename=savename, area=area, variable=variable, pressure_level=pressure_level,
                             year=str(y), month=f'{m:02}')

def request_data(savename='era5_example', base_level='pressure', form='netcdf', area='50/7/47/20',
                 variable='geopotential', pressure_level='700', year='2000', month='12'):
    c = cdsapi.Client()
    if base_level == 'pressure':
        c.retrieve(f"reanalysis-era5-{base_level}-levels", {
                "product_type":   "reanalysis",
                "format":         form,
                "area":           area, # N/W/S/E
                "variable":       variable,
                "pressure_level": pressure_level,
                "year":           year,
                "month":          month,
                "day":            ["01", "02", "03", "04",
                                   "05", "06", "07", "08",
                                   "09", "10", "11", "12",
                                   "13", "14", "15", "16",
                                   "17", "18", "19", "20",
                                   "21", "22", "23", "24",
                                   "25", "26", "27", "28",
                                   "29", "30", "31"],
                "time":           ["00", "01", "02", "03", "04", "05",
                                  "06", "07", "08", "09", "10", "11",
                                  "12", "13","14", "15", "16", "17",
                                  "18", "19", "20","21", "22", "23"]
            }, path(f'{savename}_{year}_{month}.nc'))
    elif base_level == 'single':
        c.retrieve(f"reanalysis-era5-{base_level}-levels", {
                "product_type":   "reanalysis",
                "format":         form,
                "area":           area, # N/W/S/E
                "variable":       variable,
                "year":           year,
                "month":          month,
                "day":            ["01", "02", "03", "04",
                                   "05", "06", "07", "08",
                                   "09", "10", "11", "12",
                                   "13", "14", "15", "16",
                                   "17", "18", "19", "20",
                                   "21", "22", "23", "24",
                                   "25", "26", "27", "28",
                                   "29", "30", "31"],
                "time":           ["00", "01", "02", "03", "04", "05",
                                  "06", "07", "08", "09", "10", "11",
                                  "12", "13","14", "15", "16", "17",
                                  "18", "19", "20","21", "22", "23"]
            }, path(f'{savename}_{year}_{month}.nc'))

### download request for the data of interest on pressure levels

# Note: check if areastr var and matnr are correct before executing

In [None]:
# change vars
areastr = 'danube'
matnr = 'a1303583'


# define areas of interest
area_dict = {'danube': '50/7/47/20',
             'asia': '55/-140/0/35',
             'usa': '50/-125/25/-70'
            }
# choose area: 'danube', 'asia', 'usa'
area = area_dict[areastr]

# variables
variable = ['geopotential', 'temperature']#, 'specific humidity']

# pressure levels
base_level = 'pressure' # 'pressure' or 'single'
pressure_level = ['850', '700', '500']


# define time range: start end in the format YYYY:MM
time_start = '1981:01' # full range start: '1981:01'
time_end = '2017:12' # full range end: '2017:12'

# create savename string
variablestr = "_".join([x.replace(' ', '_') for x in variable])
pressure_levelstr = "_".join([x for x in pressure_level])
path_to_repo = path('home', 'srvx11', 'lehre', 'users', matnr, 'ipython', 'ml_flood')
savename = path(path_to_repo, 'data', areastr, f'era5_{variablestr}_{pressure_levelstr}')

In [None]:
# retrieve data
era5_retrieval(savename=savename, area=area, base_level=base_level, variable=variable,
               pressure_level=pressure_level, time_start=time_start, time_end=time_end)