# Download ERA5 pressure level data

In [18]:
# modules
import cdsapi    # copernicus connection
import calendar  # to find days per month
import os        # to check if file already exists
import math
from pathlib import Path
from shutil import copyfile
from datetime import datetime

#### Control file handling

In [2]:
# Easy access to control file folder
controlFolder = Path('../../0_control_files')

In [3]:
# Store the name of the 'active' file in a variable
controlFile = 'control_active.txt'

In [4]:
# Function to extract a given setting from the control file
def read_from_control( file, setting ):
    
    # Open 'control_active.txt' and ...
    with open(file) as contents:
        for line in contents:
            
            # ... find the line with the requested setting
            if setting in line:
                break
    
    # Extract the setting's value
    substring = line.split('|',1)[1]      # Remove the setting's name (split into 2 based on '|', keep only 2nd part)
    substring = substring.split('#',1)[0] # Remove comments, does nothing if no '#' is found
    substring = substring.strip()         # Remove leading and trailing whitespace, tabs, newlines
       
    # Return this value    
    return substring

In [5]:
# Function to specify a default path
def make_default_path(suffix):
    
    # Get the root path
    rootPath = Path( read_from_control(controlFolder/controlFile,'root_path') )
    
    # Get the domain folder
    domainName = read_from_control(controlFolder/controlFile,'domain_name')
    domainFolder = 'domain_' + domainName
    
    # Specify the forcing path
    defaultPath = rootPath / domainFolder / suffix
    
    return defaultPath

#### Find where to save the data

In [6]:
# Find the path where the raw forcing needs to go
forcingPath = read_from_control(controlFolder/controlFile,'forcing_raw_path')

In [7]:
# Specify the default paths if required
if forcingPath == 'default':
    forcingPath = make_default_path('forcing/1_ERA5_raw_data')
else: 
    forcingPath = Path(forcingPath) # ensure Path() object 

In [8]:
# Make the folder if it doesn't exist
forcingPath.mkdir(parents=True, exist_ok=True)

#### Find temporal and spatial domain from control file

In [9]:
# Find which years to download
years = read_from_control(controlFolder/controlFile,'forcing_raw_time')

In [10]:
# Split the string into 2 integers
years = years.split(',')
years = [int(year) for year in years]

In [45]:
# Find the spatial extent the data needs to cover
bounding_box = read_from_control(controlFolder/controlFile,'forcing_raw_space') 

#### Convert bounding box coordinates to the forcing spatial grid

In [36]:
# function to round coordinates of a bounding box to ERA5s 0.25 degree resolution
def round_coords_to_ERA5(coords):
    
    '''Assumes coodinates are an array: [lon_min,lat_min,lon_max,lat_max].
    Returns separate lat and lon vectors.'''
    
    # Extract values
    lon = [coords[1],coords[3]]
    lat = [coords[2],coords[0]]
    
    # Round to ERA5 0.25 degree resolution
    rounded_lon = [math.floor(lon[0]*4)/4, math.ceil(lon[1]*4)/4]
    rounded_lat = [math.floor(lat[0]*4)/4, math.ceil(lat[1]*4)/4]
    
    # Find if we are still in the representative area of a different ERA5 grid cell
    if lat[0] > rounded_lat[0]+0.125:
        rounded_lat[0] += 0.25
    if lon[0] > rounded_lon[0]+0.125:
        rounded_lon[0] += 0.25
    if lat[1] < rounded_lat[1]-0.125:
        rounded_lat[1] -= 0.25
    if lon[1] < rounded_lon[1]-0.125:
        rounded_lon[1] -= 0.25
    
    # Make a download string
    dl_string = '{}/{}/{}/{}'.format(rounded_lat[1],rounded_lon[0],rounded_lat[0],rounded_lon[1])
    
    return dl_string, rounded_lat, rounded_lon

In [32]:
# Convert string to array
bounding_box = bounding_box.split('/')
bounding_box = [float(value) for value in bounding_box]

In [37]:
# Find the rounded bounding box
coordinates,_,_ = round_coords_to_ERA5(bounding_box)

In [44]:
# Check what we selected
print('Starting download of [{}] for years {}-{}.'.format(coordinates,years[0],years[1]))

Starting download of [51.75/-116.5/51.0/-115.5] for years 2008-2013.


#### Download the data in monthly chunks

In [14]:
# Start the year loop
for year in range(years[0],years[1]+1): 
    
    # Start the month loop
    for month in range (1,13): # this loops through numbers 1 to 12
       
        # find the number of days in this month
        daysInMonth = calendar.monthrange(year,month) 
        
        # compile the date string in the required format. Append 0's to the month number if needed (zfill(2))
        date = str(year) + '-' + str(month).zfill(2) + '-01/to/' + \
            str(year) + '-' + str(month).zfill(2) + '-' + str(daysInMonth[1]).zfill(2) 
        
        # compile the file name string
        file = forcingPath / ('ERA5_pressureLevel137_' + str(year) + str(month).zfill(2) + '.nc')

        # track progress
        print('Trying to download ' + date + ' into ' + str(file))

        # if file doesn't yet exist, download the data
        if not os.path.isfile(file):

            # Make sure the connection is re-tried if it fails
            retries_max = 10
            retries_cur = 1
            while retries_cur <= retries_max:
                try:

                    # connect to Copernicus (requires .cdsapirc file in $HOME)
                    c = cdsapi.Client()

                    # specify and retrieve data
                    c.retrieve('reanalysis-era5-complete', {    # do not change this!
                        'class': 'ea',
                        'expver': '1',
                        'stream': 'oper',
                        'type': 'an',
                        'levtype': 'ml',
                        'levelist': '137',
                        'param': '130/131/132/133',
                        'date': date,
                        'time': '00/to/23/by/1',
                        'area': coordinates,
                        'grid': '0.25/0.25', # Latitude/longitude grid: east-west (longitude) and north-south resolution (latitude).
                        'format'  : 'netcdf',
                    }, file)
            
                    # track progress
                    print('Successfully downloaded ' + str(file))

                except Exception as e:
                    print('Error downloading ' + str(file) + ' on try ' + str(retries_cur))
                    print(str(e))
                    retries_cur += 1
                    continue
                else:
                    break

Trying to download 1979-01-01/to/1979-01-31 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_pressureLevel137_197901.nc
Trying to download 1979-02-01/to/1979-02-28 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_pressureLevel137_197902.nc
Trying to download 1979-03-01/to/1979-03-31 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_pressureLevel137_197903.nc
Trying to download 1979-04-01/to/1979-04-30 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_pressureLevel137_197904.nc
Trying to download 1979-05-01/to/1979-05-31 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_pressureLevel137_197905.nc
Trying to download 1979-06-01/to/1979-06-30 into C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\1_ERA5_raw_data\ERA5_pressureLevel137_197906.nc
Trying to download 1979-07-01/to/1

#### Code provenance
Generates a basic log file in the domain folder and copies the control file and itself there.

In [9]:
# Create a log folder
logFolder = '_workflow_log'
Path( forcingPath / logFolder ).mkdir(parents=True, exist_ok=True)

In [13]:
# Copy this script
thisFile = 'download_ERA5_pressureLevel_annual.ipynb'
copyfile(thisFile, forcingPath / logFolder / thisFile);

In [14]:
# Get current date and time
now = datetime.now()

In [23]:
# Create a log file 
logFile = now.strftime('%Y%m%d') + '_pressure_level_log.txt'
with open( forcingPath / logFolder / logFile, 'w') as file:
    
    lines = ['Log generated by ' + thisFile + ' on ' + now.strftime('%Y/%m/%d %H:%M:%S') + '\n',
             'Downloaded ERA5 pressure level data for space (lat_max, lon_min, lat_min, lon_max) [{}] for time Jan-{} / Dec-{}.'.format(coordinates,years[0],years[1])]
    for txt in lines:
        file.write(txt)    