# Downloading ERA5 Dataset

In [None]:
import os
import sys
import glob
import cdsapi
import datetime
import calendar
import numpy as np
import xarray as xr
from pathlib import Path
from datetime import timedelta

# adds the package path to the Python path to make sure all the local imports work fine 
if os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd()))) not in sys.path:
    sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd()))))
    
from wp4.constants import DATA_DIR_ERA5, CDS_URL, CDS_KEY

import warnings  
# Filters out a SSL warning that is given when using the cdsapi and explicitly passing the url and key as parameters 
warnings.filterwarnings("ignore", message="Unverified HTTPS request is being made to host ")

In [None]:
AREA_OF_INTEREST = [
    55.65,
    -11.35,
    51.35,
    -5.25,
]

DATASET ='reanalysis-era5-single-levels' # 'reanalysis-era5-land' or 'reanalysis-era5-single-levels'

if not os.path.exists(Path(DATA_DIR_ERA5).joinpath('per_month')):
    os.makedirs(Path(DATA_DIR_ERA5).joinpath('per_month'))

In [None]:
# initiate API client
c = cdsapi.Client(
    url=CDS_URL,
    key=CDS_KEY
)

for year in [2021]:  # 2015, 2016, 2017, 2018, 2019, 2020,
    for month in ['01', '02',]: # '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'
        
        if datetime.date(year=year, month=int(month), day=1) > datetime.date.today():
            print('Download process completed')
            break
        
        # get the days in the month
        days_in_month = calendar.monthrange(year, int(month))[1]
        
        # zero pad the days
        days_param = [str(x).zfill(2) for x in range(1, days_in_month + 1)]
        
        params = {
            'format': 'netcdf',
            'grid':[0.1,0.1],
            'variable': [
                '10m_u_component_of_wind',
                '10m_v_component_of_wind',
                '2m_temperature',
                'total_precipitation',
            ],
            'year': f'{year}',
            'month': month,
            'day': days_param,
            'time': [
                '00:00', '01:00', '02:00',
                '03:00', '04:00', '05:00',
                '06:00', '07:00', '08:00',
                '09:00', '10:00', '11:00',
                '12:00', '13:00', '14:00',
                '15:00', '16:00', '17:00',
                '18:00', '19:00', '20:00',
                '21:00', '22:00', '23:00',
            ],
            'area': AREA_OF_INTEREST,
        }
        
        if DATASET == 'reanalysis-era5-single-levels':
            # for this dataset an additional parameter is required
            params = {**params, **{'product_type': 'reanalysis',}}
        
        # send the request to the server
        try:
            c.retrieve(
                DATASET,
                params,
                Path(DATA_DIR_ERA5).joinpath('per_month').joinpath(f'{year}_{month}.nc').as_posix()
            )
        except Exception as e:
            raise

In [None]:
# get all the filenames ending on nc in the download directory 
monthly_era5_files = glob.glob(f'{Path(DATA_DIR_ERA5).joinpath("per_month").as_posix()}/*.nc')

# open each nc file using xarray
monthly_era5_datasets = [xr.open_dataset(x) for x in monthly_era5_files]

# combine the data along the time dimension using the xarray combine_nested function 
ds = xr.combine_nested(monthly_era5_datasets , concat_dim='time', combine_attrs='drop_conflicts')

# sort the data based on date
sorted_ds = ds.sortby('time')

# save as a single netcdf file
if DATASET == 'reanalysis-era5-single-levels':
    # ERA5 has an extra dimension 'expver' as as result of data from the ERA5T (near real time) 
    # dataset being used before the reanalysis data become available. 
    # The following line gets rid of this extra dimension
    
    if 'expver' in list(ds.dims):
        sorted_ds = sorted_ds.sel(expver=1).combine_first(sorted_ds.sel(expver=5))
    
    sorted_ds.to_netcdf(f'{DATA_DIR_ERA5}/_era5.nc')
    
elif DATASET == 'reanalysis-era5-land':
    sorted_ds.to_netcdf(f'{DATA_DIR_ERA5}/_era5_land.nc')