# Download ERA5 reanalysis from Copernicus Climate Data Store

- website: https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels?tab=overview
- CDS data requests have limits in terms of number of fields and volume size. Therefore, each day is extracted as a single file.
- Period: 2016 - 2020 --> 1826 files
- Time to download: around 9h

Follow the instructions [here](https://cds.climate.copernicus.eu/api-how-to) for registration and installation of the CDS API key and the python package "cdsapi".  

In [None]:
# =======
# import packages

# to read netcdf (for quick check)
import xarray as xr
# to download from CDS (Copernicus Climate Data Store)
import cdsapi
# set path
from pathlib import Path
# to create dates (for loop)
from datetime import date, timedelta
# plotting
import matplotlib.pyplot as plt

In [None]:
# surface variables

datapath = '../data/ERA5/'
start_date = date(2016,1,1) 
end_date = date(2020,12,31) 
delta = end_date - start_date

c = cdsapi.Client()
# loop trough each day
for i in range(delta.days + 1):
    timestep = start_date + timedelta(days=i)
    year = str(timestep.year)
    month = str(timestep.month).zfill(2)
    day = str(timestep.day).zfill(2)
    filename = year + month + day + '_era5_surface_hourly.nc'
    print(filename)
    
    # checking if files already exists, else download ERA5 data
    my_file = Path(datapath + filename)
    if my_file.is_file():
        print(" File already exists!")
    else:
        c.retrieve(
            'reanalysis-era5-single-levels',
            {
                'product_type': 'reanalysis',
                'format': 'netcdf',
                'variable': [
                    '100m_u_component_of_wind', '100m_v_component_of_wind', '10m_u_component_of_wind',
                    '10m_v_component_of_wind', '2m_dewpoint_temperature', '2m_temperature',
                    'boundary_layer_height', 'surface_pressure', 'surface_sensible_heat_flux',
                    'total_precipitation', 'forecast_surface_roughness',
                ],
                'year': year,
                'month': month,
                'day': day,
                'time': [
                    '00:00', '01:00', '02:00',
                    '03:00', '04:00', '05:00',
                    '06:00', '07:00', '08:00',
                    '09:00', '10:00', '11:00',
                    '12:00', '13:00', '14:00',
                    '15:00', '16:00', '17:00',
                    '18:00', '19:00', '20:00',
                    '21:00', '22:00', '23:00',
                ],
                'area': [
                    48.2, 5.2, 45.4,
                    11.02,
                ],
            },
            datapath + filename)

In [None]:
# variables on 500 hPa

datapath = '../data/ERA5/'
start_date = date(2016,1,1) 
end_date = date(2020,12,31) 
delta = end_date - start_date

c = cdsapi.Client()
# loop trough each day
for i in range(delta.days + 1):
    timestep = start_date + timedelta(days=i)
    year = str(timestep.year)
    month = str(timestep.month).zfill(2)
    day = str(timestep.day).zfill(2)
    filename = year + month + day + '_era5_z500_hourly.nc'
    print(filename)
    
    # checking if files already exists, else download ERA5 data
    my_file = Path(datapath + filename)
    if my_file.is_file():
        print(" File already exists!")
    else:
        c.retrieve(
            'reanalysis-era5-pressure-levels',
            {
                'product_type': 'reanalysis',
                'format': 'netcdf',
                'variable': [
                    'divergence', 'geopotential', 'u_component_of_wind',
                    'v_component_of_wind', 'vertical_velocity', 'vorticity',
                ],
                'pressure_level': '500',
                'year': year,
                'month': month,
                'day': day,
                'time': [
                    '00:00', '01:00', '02:00',
                    '03:00', '04:00', '05:00',
                    '06:00', '07:00', '08:00',
                    '09:00', '10:00', '11:00',
                    '12:00', '13:00', '14:00',
                    '15:00', '16:00', '17:00',
                    '18:00', '19:00', '20:00',
                    '21:00', '22:00', '23:00',
                ],
                'area': [
                    48.2, 5.2, 45.4,
                    11.02,
                ],
            },
            datapath + filename)

# Quick check if files OK

In [None]:
# load ERA5 data
ds = xr.open_mfdataset(datapath + '*_era5_surface_hourly.nc', concat_dim='time' )
print(type(ds))
ds

In [None]:
ds.time

In [None]:
# plot at a random time
ds.u10.isel(time=24).plot()