In [1]:
### imports

import cdsapi
import numpy as np
import os
import sys
from collections import OrderedDict
from os.path import join as jn
from pathlib import Path

In [2]:
### "raw_dir" this is the directory where the data will be downloaded, please set it as appropiate.
### "raw" must be contained in the name of the directory since we are downloading raw data
### Version control is also necesary.

version = "v2024-06-27"

# The following directory must exist (manually created by the user).
# This is not automated to avoid accidentally creating files in the user OS.
raw_dir = os.path.expanduser(f"~/data/weather_data/corpenicus/ERA5/raw")

version_dir = f"{raw_dir}/{version}"  # append with version label
print("Directory for specified version:", version_dir)

Directory for specified version: /fast/home/j-hu/data/weather_data/corpenicus/ERA5/raw/v2024-06-27


In [5]:
### this are customisable input parameters:


### "variables": are a list of tuples (source, variable) according to the data you are interested in downloading
### "years_to_download": a list of years to be dowloaded
### "months_to_download": a list of two-digit strings representing the month of the year
### "days_to_download": a list of two-digit strings representing the days of the month
### "hours_to_download": a list of strings representing the hours of the day
### "area_to_download": the geographical extent of the data that we want to download (Xmax,Ymin,Xmin,Ymax)


variables = [
    ("reanalysis-era5-single-levels", "100m_u_component_of_wind"),
    ("reanalysis-era5-single-levels", "100m_v_component_of_wind"),
    ("reanalysis-era5-single-levels", "surface_pressure"),
    ("reanalysis-era5-single-levels", "boundary_layer_height"),
    ("reanalysis-era5-single-levels", "2m_temperature"),
]

years_to_download = np.arange(2019, 2020, 1)

months_to_download = [
    "01",
    "02",
    "03",
    "04",
    "05",
    "06",
    "07",
    "08",
    "09",
    "10",
    "11",
    "12",
]

days_to_download = [
    "01",
    "02",
    "03",
    "04",
    "05",
    "06",
    "07",
    "08",
    "09",
    "10",
    "11",
    "12",
    "13",
    "14",
    "15",
    "16",
    "17",
    "18",
    "19",
    "20",
    "21",
    "22",
    "23",
    "24",
    "25",
    "26",
    "27",
    "28",
    "29",
    "30",
    "31",
]

hours_to_download = [
    "00:00",
    "01:00",
    "02:00",
    "03:00",
    "04:00",
    "05:00",
    "06:00",
    "07:00",
    "08:00",
    "09:00",
    "10:00",
    "11:00",
    "12:00",
    "13:00",
    "14:00",
    "15:00",
    "16:00",
    "17:00",
    "18:00",
    "19:00",
    "20:00",
    "21:00",
    "22:00",
    "23:00",
]

area_to_download = [
    55,
    5,
    45,
    15,
]  # area for Germany, change if necessary

In [6]:
#### this code does the following in a loop for the input parameters in the last cell:
### 1) creates a "year_path" directory inside of the "raw_dir" directory if it is not already created
### 2) forms an 'OUTPUT' file path inside of the "year_path" directory and checks if it already exists
###     A) if it exists, it is assumed to be already downloaded and skipped
###     B) it not, it downloads it and calls it accoriding to the "OUTPUT" file name

# Throw error if raw_dir does not exist
if not os.path.isdir(raw_dir):
    raise FileNotFoundError(
        f"The specified raw data directory {raw_dir} does not exist. Please create it manually."
    )

if not os.path.isdir(version_dir):
    os.mkdir(version_dir)
    print(version_dir, "was created.")

for year in years_to_download:

    year_path = jn(version_dir, str(year))
    if not os.path.isdir(year_path):
        os.mkdir(year_path)
        print(year_path, "was created.")

    print("DOWNLOADING YEAR:", year)

    for source, variable in variables:
        file_name = "{}.{}.{}.nc".format(source, year, variable)
        output_file = os.path.join(year_path, file_name)
        print(file_name)

        if os.path.isfile(output_file):
            print("already downloaded: skipped")

        else:
            c = cdsapi.Client()

            c.retrieve(
                source,
                {
                    "product_type": "reanalysis",
                    "area": area_to_download,
                    "variable": [
                        variable,
                    ],
                    "year": str(year),
                    "month": months_to_download,
                    "day": days_to_download,
                    "time": hours_to_download,
                    "format": "netcdf",
                },
                output_file,
            )

            print(" OPERATION DONE ")

    print("####################")

2024-06-27 11:42:47,515 INFO Welcome to the CDS
2024-06-27 11:42:47,516 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels


/fast/home/j-hu/data/weather_data/corpenicus/ERA5/raw/v2024-06-27/2019 was created.
DOWNLOADING YEAR: 2019
reanalysis-era5-single-levels.2019.100m_u_component_of_wind.nc


2024-06-27 11:42:47,594 INFO Request is completed
2024-06-27 11:42:47,595 INFO Downloading https://download-0014-clone.copernicus-climate.eu/cache-compute-0014/cache/data4/adaptor.mars.internal-1719478940.155022-1801-6-520e24d7-661b-4b65-902a-0f2699a9dab0.nc to /fast/home/j-hu/data/weather_data/corpenicus/ERA5/raw/v2024-06-27/2019/reanalysis-era5-single-levels.2019.100m_u_component_of_wind.nc (28.1M)
2024-06-27 11:42:48,267 INFO Download rate 41.8M/s  
2024-06-27 11:42:48,337 INFO Welcome to the CDS
2024-06-27 11:42:48,337 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2024-06-27 11:42:48,439 INFO Request is completed
2024-06-27 11:42:48,440 INFO Downloading https://download-0013-clone.copernicus-climate.eu/cache-compute-0013/cache/data0/adaptor.mars.internal-1719479708.864158-31793-5-8deead54-f09f-4785-b75d-1def4397ca92.nc to /fast/home/j-hu/data/weather_data/corpenicus/ERA5/raw/v2024-06-27/2019/reanalysis-era5-single-levels.20

 OPERATION DONE 
reanalysis-era5-single-levels.2019.100m_v_component_of_wind.nc


2024-06-27 11:42:51,653 INFO Download rate 8.8M/s   
2024-06-27 11:42:51,725 INFO Welcome to the CDS
2024-06-27 11:42:51,726 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2024-06-27 11:42:51,838 INFO Request is completed
2024-06-27 11:42:51,838 INFO Downloading https://download-0006-clone.copernicus-climate.eu/cache-compute-0006/cache/data9/adaptor.mars.internal-1719480530.7508433-12178-17-715b802a-580e-4541-bbd3-91c63d75894d.nc to /fast/home/j-hu/data/weather_data/corpenicus/ERA5/raw/v2024-06-27/2019/reanalysis-era5-single-levels.2019.surface_pressure.nc (28.1M)


 OPERATION DONE 
reanalysis-era5-single-levels.2019.surface_pressure.nc


2024-06-27 11:42:53,677 INFO Download rate 15.3M/s  
2024-06-27 11:42:53,753 INFO Welcome to the CDS
2024-06-27 11:42:53,754 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2024-06-27 11:42:53,824 INFO Request is queued


 OPERATION DONE 
reanalysis-era5-single-levels.2019.2m_temperature.nc


2024-06-27 11:45:45,198 INFO Request is completed
2024-06-27 11:45:45,199 INFO Downloading https://download-0005-clone.copernicus-climate.eu/cache-compute-0005/cache/data6/adaptor.mars.internal-1719481474.672658-25673-9-ab13f096-5dd3-4334-948f-d8c7b19d1bfb.nc to /fast/home/j-hu/data/weather_data/corpenicus/ERA5/raw/v2024-06-27/2019/reanalysis-era5-single-levels.2019.2m_temperature.nc (28.1M)
2024-06-27 11:45:45,843 INFO Download rate 43.7M/s  


 OPERATION DONE 
####################
