# Download ERA5 geopotential data
Geopotential data can be converted into elevation, which is needed for temperature lapsing.Requires use of the Copernicus Data Store API
CDS registration: https://cds.climate.copernicus.eu/user/register?destination=%2F%23!%2Fhome 
CDS api setup: https://cds.climate.copernicus.eu/api-how-to

In [1]:
# modules
import cdsapi    # copernicus connection
import calendar  # to find days per month
import os        # to check if file already exists
import math
from pathlib import Path
from shutil import copyfile
from datetime import datetime

#### Control file handling

In [2]:
# Easy access to control file folder
controlFolder = Path('../../0_control_files')

In [3]:
# Store the name of the 'active' file in a variable
controlFile = 'control_active.txt'

In [4]:
# Function to extract a given setting from the control file
def read_from_control( file, setting ):
    
    # Open 'control_active.txt' and ...
    with open(file) as contents:
        for line in contents:
            
            # ... find the line with the requested setting
            if setting in line and not line.startswith('#'):
                break
    
    # Extract the setting's value
    substring = line.split('|',1)[1]      # Remove the setting's name (split into 2 based on '|', keep only 2nd part)
    substring = substring.split('#',1)[0] # Remove comments, does nothing if no '#' is found
    substring = substring.strip()         # Remove leading and trailing whitespace, tabs, newlines
       
    # Return this value    
    return substring

In [5]:
# Function to specify a default path
def make_default_path(suffix):
    
    # Get the root path
    rootPath = Path( read_from_control(controlFolder/controlFile,'root_path') )
    
    # Get the domain folder
    domainName = read_from_control(controlFolder/controlFile,'domain_name')
    domainFolder = 'domain_' + domainName
    
    # Specify the forcing path
    defaultPath = rootPath / domainFolder / suffix
    
    return defaultPath

#### Find where to save the data

In [6]:
# Find the path where the raw forcing needs to go
geoPath = read_from_control(controlFolder/controlFile,'forcing_geo_path')

In [7]:
# Specify the default paths if required
if geoPath == 'default':
    geoPath = make_default_path('forcing/0_geopotential')
else: 
    geoPath = Path(geoPath) # ensure Path() object 

In [8]:
# Make the folder if it doesn't exist
geoPath.mkdir(parents=True, exist_ok=True)

#### Find spatial domain from control file

In [6]:
# Find the spatial extent the data needs to cover
bounding_box = read_from_control(controlFolder/controlFile,'forcing_raw_space') 

In [7]:
# function to round coordinates of a bounding box to ERA5s 0.25 degree resolution
def round_coords_to_ERA5(coords):
    
    '''Assumes coodinates are an array: [lon_min,lat_min,lon_max,lat_max].
    Returns separate lat and lon vectors.'''
    
    # Extract values
    lon = [coords[1],coords[3]]
    lat = [coords[2],coords[0]]
    
    # Round to ERA5 0.25 degree resolution
    rounded_lon = [math.floor(lon[0]*4)/4, math.ceil(lon[1]*4)/4]
    rounded_lat = [math.floor(lat[0]*4)/4, math.ceil(lat[1]*4)/4]
    
    # Find if we are still in the representative area of a different ERA5 grid cell
    if lat[0] > rounded_lat[0]+0.125:
        rounded_lat[0] += 0.25
    if lon[0] > rounded_lon[0]+0.125:
        rounded_lon[0] += 0.25
    if lat[1] < rounded_lat[1]-0.125:
        rounded_lat[1] -= 0.25
    if lon[1] < rounded_lon[1]-0.125:
        rounded_lon[1] -= 0.25
    
    # Make a download string
    dl_string = '{}/{}/{}/{}'.format(rounded_lat[1],rounded_lon[0],rounded_lat[0],rounded_lon[1])
    
    return dl_string, rounded_lat, rounded_lon

In [8]:
# Convert string to array
bounding_box = bounding_box.split('/')
bounding_box = [float(value) for value in bounding_box]

In [9]:
# Find the rounded bounding box
coordinates,_,_ = round_coords_to_ERA5(bounding_box)

In [10]:
# Check what we selected
print('Starting download of [{}].'.format(coordinates))

Starting download of [51.75/-116.5/51.0/-115.5].


#### Specify a date to download
Geopotential is part of the ERA5 "invariant" data, which are constant through time.

In [11]:
# Specify an arbitrary date to download
date = '2019-01-01'

#### Download the data

In [12]:
# Specify a filename
file = geoPath / 'ERA5_geopotential.nc'

In [13]:
# if file doesn't yet exist, download the data
if not os.path.isfile(file):

    # Make sure the connection is re-tried if it fails
    retries_max = 10
    retries_cur = 1
    while retries_cur <= retries_max:
        try:
            
            # connect to Copernicus (requires .cdsapirc file in $HOME)
            c = cdsapi.Client()

            # specify and retrieve data
            c.retrieve('reanalysis-era5-complete', {    # do not change this!
                    'stream': 'oper',
                    'levtype': 'sf',
                    'param': '26/228007/27/28/29/30/43/74/129/160/161/162/163/172',
                    'date': date,
                    'time': '00',#/to/23/by/1',
                    'area': coordinates,
                    'grid': '0.25/0.25', # Latitude/longitude grid: east-west (longitude) and north-south resolution (latitude).
                    'format'  : 'netcdf',
                }, file)
            
            # track progress
            print('Successfully downloaded ' + str(file))

        except:
            print('Error downloading ' + str(file) + ' on try ' + str(retries_cur))
            retries_cur += 1
            continue
        else:
            break

2021-02-24 11:09:59,066 INFO Welcome to the CDS
2021-02-24 11:09:59,066 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2021-02-24 11:09:59,238 INFO Request is queued
2021-02-24 19:55:54,160 INFO Request is completed
2021-02-24 19:55:54,188 INFO Downloading https://download-0014.copernicus-climate.eu/cache-compute-0014/cache/data5/adaptor.mars.external-1614221671.0050914-29905-32-4b846a4e-52af-41c1-8b24-534cf0efcb8c.nc to C:\Globus endpoint\summaWorkflow_data\domain_BowAtBanff\forcing\0_geopotential\ERA5_geopotential.nc (5.1K)
2021-02-24 19:55:55,246 INFO Download rate 4.9K/s                                                                      


TypeError: can only concatenate str (not "WindowsPath") to str

#### Code provenance
Generates a basic log file in the domain folder and copies the control file and itself there.

In [20]:
# Create a log folder
logFolder = '_era5_invariants_log'
Path( geoPath / logFolder ).mkdir(parents=True, exist_ok=True)

In [22]:
# Copy this script
thisFile = 'download_ERA5_geopotential.ipynb'
copyfile(thisFile, geoPath / logFolder / thisFile);

In [23]:
# Get current date and time
now = datetime.now()

In [24]:
# Create a log file 
logFile = now.strftime('%Y%m%d') + '_pressure_level_log.txt'
with open( geoPath / logFolder / logFile, 'w') as file:
    
    lines = ['Log generated by ' + thisFile + ' on ' + now.strftime('%Y/%m/%d %H:%M:%S') + '\n',
             'Downloaded ERA5 geopotential data for space (lat_max, lon_min, lat_min, lon_max) [{}].'.format(coordinates)]
    for txt in lines:
        file.write(txt)    