# Download ERA5 surface level dataRequires use of the Copernicus Data Store API
CDS registration: https://cds.climate.copernicus.eu/user/register?destination=%2F%23!%2Fhome 
CDS api setup: https://cds.climate.copernicus.eu/api-how-to

In [2]:
# modules
import cdsapi    # copernicus connection
import calendar  # to find days per month
import os        # to check if file already exists
import math
from pathlib import Path
from shutil import copyfile
from datetime import datetime

#### Control file handling

In [3]:
# Easy access to control file folder
controlFolder = Path('../../0_control_files')

In [4]:
# Store the name of the 'active' file in a variable
controlFile = 'control_active.txt'

In [5]:
# Function to extract a given setting from the control file
def read_from_control( file, setting ):
    
    # Open 'control_active.txt' and ...
    with open(file) as contents:
        for line in contents:
            
            # ... find the line with the requested setting
            if setting in line and not line.startswith('#'):
                break
    
    # Extract the setting's value
    substring = line.split('|',1)[1]      # Remove the setting's name (split into 2 based on '|', keep only 2nd part)
    substring = substring.split('#',1)[0] # Remove comments, does nothing if no '#' is found
    substring = substring.strip()         # Remove leading and trailing whitespace, tabs, newlines
       
    # Return this value    
    return substring

In [6]:
# Function to specify a default path
def make_default_path(suffix):
    
    # Get the root path
    rootPath = Path( read_from_control(controlFolder/controlFile,'root_path') )
    
    # Get the domain folder
    domainName = read_from_control(controlFolder/controlFile,'domain_name')
    domainFolder = 'domain_' + domainName
    
    # Specify the forcing path
    defaultPath = rootPath / domainFolder / suffix
    
    return defaultPath

#### Find where to save the data

In [6]:
# Find the path where the raw forcing needs to go
# Immediately store as a 'Path' to avoid issues with '/' and '\' on different operating systems
forcingPath = read_from_control(controlFolder/controlFile,'forcing_raw_path')

In [7]:
# Specify the default paths if required
if forcingPath == 'default':
    forcingPath = make_default_path('forcing/1_ERA5_raw_data')
else: 
    forcingPath = Path(forcingPath) # ensure Path() object 

In [8]:
# Make the folder if it doesn't exist
forcingPath.mkdir(parents=True, exist_ok=True)

#### Find temporal and spatial domain from control file

In [7]:
# Find which years to download
years = read_from_control(controlFolder/controlFile,'forcing_raw_time')

In [8]:
# Split the string into 2 integers
years = years.split(',')
years = [int(year) for year in years]

In [9]:
# Find the spatial extent the data needs to cover
bounding_box = read_from_control(controlFolder/controlFile,'forcing_raw_space') 

#### Convert bounding box coordinates to the forcing spatial grid

In [10]:
# function to round coordinates of a bounding box to ERA5s 0.25 degree resolution
def round_coords_to_ERA5(coords):
    
    '''Assumes coodinates are an array: [lon_min,lat_min,lon_max,lat_max].
    Returns separate lat and lon vectors.'''
    
    # Extract values
    lon = [coords[1],coords[3]]
    lat = [coords[2],coords[0]]
    
    # Round to ERA5 0.25 degree resolution
    rounded_lon = [math.floor(lon[0]*4)/4, math.ceil(lon[1]*4)/4]
    rounded_lat = [math.floor(lat[0]*4)/4, math.ceil(lat[1]*4)/4]
    
    # Find if we are still in the representative area of a different ERA5 grid cell
    if lat[0] > rounded_lat[0]+0.125:
        rounded_lat[0] += 0.25
    if lon[0] > rounded_lon[0]+0.125:
        rounded_lon[0] += 0.25
    if lat[1] < rounded_lat[1]-0.125:
        rounded_lat[1] -= 0.25
    if lon[1] < rounded_lon[1]-0.125:
        rounded_lon[1] -= 0.25
    
    # Make a download string
    dl_string = '{}/{}/{}/{}'.format(rounded_lat[1],rounded_lon[0],rounded_lat[0],rounded_lon[1])
    
    return dl_string, rounded_lat, rounded_lon

In [11]:
# Convert string to array
bounding_box = bounding_box.split('/')
bounding_box = [float(value) for value in bounding_box]

In [12]:
# Find the rounded bounding box
coordinates,_,_ = round_coords_to_ERA5(bounding_box)

In [13]:
# Check what we selected
print('Starting download of [{}] for years {}-{}.'.format(coordinates,years[0],years[1]))

Starting download of [51.75/-116.5/51.0/-115.5] for years 2008-2013.


#### Download the data in monthly chunks

In [23]:
# Start the year loop
for year in range(years[0],years[1]+1): 
    
    # Start the month loop
    for month in range (1,13): # this loops through numbers 1 to 12
       
        # find the number of days in this month
        daysInMonth = calendar.monthrange(year,month) 
        
        # compile the date string in the required format. Append 0's to the month number if needed (zfill(2))
        date = str(year) + '-' + str(month).zfill(2) + '-01/' + \
            str(year) + '-' + str(month).zfill(2) + '-' + str(daysInMonth[1]).zfill(2) 
        
        # compile the file name string
        file = forcingPath / ('ERA5_surface_' + str(year) + str(month).zfill(2) + '.nc')

        # track progress
        print('Trying to download ' + date + ' into ' + str(file))

        # if file doesn't yet exist, download the data
        if not os.path.isfile(file):

            # Make sure the connection is re-tried if it fails
            retries_max = 10
            retries_cur = 1
            while retries_cur <= retries_max:
                try:

                    # connect to Copernicus (requires .cdsapirc file in $HOME)
                    c = cdsapi.Client()

                    # specify and retrieve data
                    c.retrieve('reanalysis-era5-single-levels', { # do not change this!
                        'product_type': 'reanalysis',
                        'format': 'netcdf',
                        'variable': [
                            'mean_surface_downward_long_wave_radiation_flux',                
                            'mean_surface_downward_short_wave_radiation_flux',
                            'mean_total_precipitation_rate', 
                            'surface_pressure',
                        ],
                        'date': date,
                        'time': '00/to/23/by/1',
                        'area': coordinates, # North, West, South, East. Default: global
                        'grid': '0.25/0.25', # Latitude/longitude grid: east-west (longitude) and north-south resolution (latitude).
                    },
                    file) # file path and name
                    
                    # track progress
                    print('Successfully downloaded ' + str(file))
                        
                except Exception as e:
                    print('Error downloading ' + str(file) + ' on try ' + str(retries_cur))
                    print(str(e))
                    retries_cur += 1
                    continue
                else:
                    break

Trying to download 1979-01-01/1979-01-31 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197901.nc
Trying to download 1979-02-01/1979-02-28 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197902.nc


2021-02-08 20:30:03,470 INFO Welcome to the CDS
2021-02-08 20:30:03,470 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2021-02-08 20:30:03,655 INFO Request is queued
2021-02-08 20:30:04,824 INFO Request is running
2021-02-08 20:34:24,749 INFO Request is completed
2021-02-08 20:34:24,750 INFO Downloading https://download-0004.copernicus-climate.eu/cache-compute-0004/cache/data5/adaptor.mars.internal-1612841403.7407765-19743-7-ff673673-bdec-4749-a35d-0a83ad617707.nc to C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197902.nc (162K)
2021-02-08 20:34:26,107 INFO Download rate 119.4K/s                                                                    
2021-02-08 20:34:26,448 INFO Welcome to the CDS
2021-02-08 20:34:26,449 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels


Successfully downloaded C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197902.nc
Trying to download 1979-03-01/1979-03-31 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197903.nc


2021-02-08 20:34:26,630 INFO Request is queued
2021-02-08 20:34:27,793 INFO Request is running
2021-02-08 20:38:46,632 INFO Request is completed
2021-02-08 20:38:46,633 INFO Downloading https://download-0007.copernicus-climate.eu/cache-compute-0007/cache/data6/adaptor.mars.internal-1612841666.7446716-1491-31-30a15bc1-2f83-4f23-9d68-813166b3a6b4.nc to C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197903.nc (179.2K)
2021-02-08 20:38:47,932 INFO Download rate 138K/s                                                                      
2021-02-08 20:38:48,272 INFO Welcome to the CDS
2021-02-08 20:38:48,272 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels


Successfully downloaded C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197903.nc
Trying to download 1979-04-01/1979-04-30 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197904.nc


2021-02-08 20:38:48,515 INFO Request is queued
2021-02-08 20:38:49,709 INFO Request is running
2021-02-08 20:43:08,561 INFO Request is completed
2021-02-08 20:43:08,562 INFO Downloading https://download-0002.copernicus-climate.eu/cache-compute-0002/cache/data2/adaptor.mars.internal-1612841928.7930326-31274-33-505b3ae2-65ba-4037-bc13-3990e61a5e65.nc to C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197904.nc (173.5K)
2021-02-08 20:43:11,546 INFO Download rate 58.2K/s                                                                     
2021-02-08 20:43:11,942 INFO Welcome to the CDS
2021-02-08 20:43:11,943 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels


Successfully downloaded C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197904.nc
Trying to download 1979-05-01/1979-05-31 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197905.nc


2021-02-08 20:43:12,130 INFO Request is queued
2021-02-08 20:43:13,294 INFO Request is running
2021-02-08 20:47:32,153 INFO Request is completed
2021-02-08 20:47:32,154 INFO Downloading https://download-0002.copernicus-climate.eu/cache-compute-0002/cache/data0/adaptor.mars.internal-1612842192.3640003-7815-19-db30b55c-b696-4db8-a4da-ff553c58fe78.nc to C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197905.nc (179.2K)
2021-02-08 20:47:34,297 INFO Download rate 83.7K/s                                                                     
2021-02-08 20:47:34,658 INFO Welcome to the CDS
2021-02-08 20:47:34,659 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels


Successfully downloaded C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197905.nc
Trying to download 1979-06-01/1979-06-30 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197906.nc


2021-02-08 20:47:34,864 INFO Request is queued
2021-02-08 20:47:36,032 INFO Request is running
2021-02-08 20:51:54,907 INFO Request is completed
2021-02-08 20:51:54,908 INFO Downloading https://download-0008.copernicus-climate.eu/cache-compute-0008/cache/data2/adaptor.mars.internal-1612842455.3546946-418-11-55529753-7537-431d-ae58-29114ea6c50f.nc to C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197906.nc (173.5K)
2021-02-08 20:51:56,253 INFO Download rate 128.9K/s                                                                    
2021-02-08 20:51:56,579 INFO Welcome to the CDS
2021-02-08 20:51:56,580 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels


Successfully downloaded C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197906.nc
Trying to download 1979-07-01/1979-07-31 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197907.nc


2021-02-08 20:51:56,769 INFO Request is queued
2021-02-08 20:51:57,932 INFO Request is running
2021-02-08 20:56:16,737 INFO Request is completed
2021-02-08 20:56:16,738 INFO Downloading https://download-0008.copernicus-climate.eu/cache-compute-0008/cache/data9/adaptor.mars.internal-1612842716.8406177-23168-37-5897295e-79e5-4791-bd09-5ae2432bca4f.nc to C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197907.nc (179.2K)
2021-02-08 20:56:19,868 INFO Download rate 57.3K/s                                                                     
2021-02-08 20:56:20,185 INFO Welcome to the CDS
2021-02-08 20:56:20,186 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels


Successfully downloaded C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197907.nc
Trying to download 1979-08-01/1979-08-31 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197908.nc


2021-02-08 20:56:20,364 INFO Request is queued
2021-02-08 20:56:21,519 INFO Request is running
2021-02-08 21:00:40,308 INFO Request is completed
2021-02-08 21:00:40,309 INFO Downloading https://download-0005.copernicus-climate.eu/cache-compute-0005/cache/data1/adaptor.mars.internal-1612842980.405912-4481-11-7f4053a1-6e92-454a-be28-9484e2b2b492.nc to C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197908.nc (179.2K)
2021-02-08 21:00:41,610 INFO Download rate 137.9K/s                                                                    
2021-02-08 21:00:41,953 INFO Welcome to the CDS
2021-02-08 21:00:41,954 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels


Successfully downloaded C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197908.nc
Trying to download 1979-09-01/1979-09-30 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197909.nc


2021-02-08 21:00:42,144 INFO Request is queued
2021-02-08 21:00:43,310 INFO Request is running
2021-02-08 21:05:02,158 INFO Request is completed
2021-02-08 21:05:02,159 INFO Downloading https://download-0009.copernicus-climate.eu/cache-compute-0009/cache/data8/adaptor.mars.internal-1612843242.2507327-11020-5-a95dd5c0-15f3-44cf-ae50-bab3cdfa99ce.nc to C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197909.nc (173.5K)
2021-02-08 21:05:03,391 INFO Download rate 140.8K/s                                                                    
2021-02-08 21:05:03,693 INFO Welcome to the CDS
2021-02-08 21:05:03,694 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels


Successfully downloaded C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197909.nc
Trying to download 1979-10-01/1979-10-31 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197910.nc


2021-02-08 21:05:03,920 INFO Request is queued
2021-02-08 21:05:05,074 INFO Request is running
2021-02-08 21:09:23,854 INFO Request is completed
2021-02-08 21:09:23,855 INFO Downloading https://download-0010.copernicus-climate.eu/cache-compute-0010/cache/data7/adaptor.mars.internal-1612843504.0099413-21120-27-6d06baea-35a8-4f8c-af40-e3be271bee0c.nc to C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197910.nc (179.2K)
2021-02-08 21:09:25,439 INFO Download rate 113.2K/s                                                                    
2021-02-08 21:09:25,805 INFO Welcome to the CDS
2021-02-08 21:09:25,806 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels


Successfully downloaded C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197910.nc
Trying to download 1979-11-01/1979-11-30 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197911.nc


2021-02-08 21:09:26,049 INFO Request is queued
2021-02-08 21:09:27,206 INFO Request is running
2021-02-08 21:13:45,997 INFO Request is completed
2021-02-08 21:13:45,998 INFO Downloading https://download-0010.copernicus-climate.eu/cache-compute-0010/cache/data9/adaptor.mars.internal-1612843766.2572463-14632-19-97c952dc-d302-4e1c-8707-f4767d04d712.nc to C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197911.nc (173.5K)
2021-02-08 21:13:47,295 INFO Download rate 133.7K/s                                                                    
2021-02-08 21:13:47,646 INFO Welcome to the CDS
2021-02-08 21:13:47,647 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels


Successfully downloaded C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197911.nc
Trying to download 1979-12-01/1979-12-31 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197912.nc


2021-02-08 21:13:47,850 INFO Request is queued
2021-02-08 21:13:49,010 INFO Request is running
2021-02-08 21:18:07,789 INFO Request is completed
2021-02-08 21:18:07,789 INFO Downloading https://download-0000.copernicus-climate.eu/cache-compute-0000/cache/data1/adaptor.mars.internal-1612844028.1393278-6028-23-7b8bd343-b89d-4d9e-bc63-4df6ea0585a6.nc to C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197912.nc (179.2K)
2021-02-08 21:18:09,125 INFO Download rate 134.2K/s                                                                    


Successfully downloaded C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_surface_197912.nc


#### Code provenance
Generates a basic log file in the domain folder and copies the control file and itself there.

In [13]:
# Create a log folder
logFolder = '_workflow_log'
Path( forcingPath / logFolder ).mkdir(parents=True, exist_ok=True)

In [14]:
# Copy this script
thisFile = 'download_ERA5_surfaceLevel_annual.ipynb'
copyfile(thisFile, forcingPath / logFolder / thisFile);

In [15]:
# Get current date and time
now = datetime.now()

In [16]:
# Create a log file 
logFile = now.strftime('%Y%m%d') + '_surface_level_log.txt'
with open( forcingPath / logFolder / logFile, 'w') as file:
    
    lines = ['Log generated by ' + thisFile + ' on ' + now.strftime('%Y/%m/%d %H:%M:%S') + '\n',
             'Downloaded ERA5 surface level data for space (lat_max, lon_min, lat_min, lon_max) [{}] for time Jan-{} / Dec-{}.'.format(coordinates,years[0],years[1])]
    for txt in lines:
        file.write(txt)  