# Download forcing
Downloads ERA5 forcing data. Time period to download is based on availability of both flow and forcing data.
- Flow: catchment-dependent
- ERA5: 1940-current
- EM-Earth: 1950-2019


We want to extract data on a per-month basis, particularly for MARS request (i.e. tape archive) because this maximizes the amount of information we extract from a single tape. This prevents unnecessary tape switching. This hopefully strikes a good balance between needs.

See:
- https://confluence.ecmwf.int/display/UDOC/Retrieve#Retrieve-Datacollocation

In [1]:
import sys
from datetime import datetime, timedelta
import pandas as pd
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

### Config handling

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [3]:
# Get the required info from the config file
data_path = cs.read_from_config(config_file,'data_path')

# CAMELS-spat metadata
cs_meta_path = cs.read_from_config(config_file,'cs_basin_path')
cs_meta_name = cs.read_from_config(config_file,'cs_meta_name')
cs_unusable_name = cs.read_from_config(config_file,'cs_unusable_name')

# Basin folder
cs_basin_folder = cs.read_from_config(config_file, 'cs_basin_path')
basins_path = Path(data_path) / cs_basin_folder

### Data loading

In [4]:
# CAMELS-spat metadata file
cs_meta_path = Path(data_path) / cs_meta_path
cs_meta = pd.read_csv(cs_meta_path / cs_meta_name)

In [5]:
# Open list of unusable stations; Enforce reading IDs as string to keep leading 0's
cs_unusable = pd.read_csv(cs_meta_path / cs_unusable_name, dtype={'Station_id': object}) 

## Processing

In [None]:
# Per basin:
# 1. Determine download coordinates (code from cwarhm)
# 2. Determine download period (from cs_meta)
# 3. Download ERA5 
# 4. Download deterministic EM-Earth

In [83]:
for ix,row in cs_meta.iterrows():

    # TESTING: run 1 station only
    if ix != 0: continue 
    
    # Get shapefile path to determine download coordinates, and forcing destination path
    basin_id, shp_lump_path, _, _, _ = cs.prepare_delineation_outputs(cs_meta, ix, Path(data_path)/cs_basin_folder)
    raw_fold, _, _ = prepare_forcing_outputs(cs_meta, ix, Path(data_path)/cs_basin_folder) # Returns folders only, not file names
    print('--- Now running basin {}. {}'.format(ix, basin_id))
    
    # From shapefile, get bounding coordinates. Then determine download coordinates from those
    bounds = find_shapefile_bounds(shp_lump_path)
    coords_era5, _, _ = find_download_coords_from_bounds(bounds, target='ERA5')
    
    # Check if we need to run downloads for this station at all
    missing = flow_obs_unavailable(cs_unusable, row.Country, row.Station_id)
    if 'iv' in missing and 'dv' in missing: 
        continue # with next station, because we have no observations at all for this station

    # From meta-data, get download period
    times_flow = find_flow_obs_times_from_metadata(row, missing)
    times_era5 = round_flow_obs_to_days(times_flow)
    start_date = datetime.strptime(times_era5[0], '%Y-%m-%d')
    final_date = datetime.strptime(times_era5[1], '%Y-%m-%d')
    
    print(f'    Basin coordinates:         {bounds}')
    print(f'    ERA5 download coordinates: [{coords_era5[0]}]')
    print(f'    Flow obs unavailable:      {missing}')
    print(f'    Download times:            {times_era5}')

    # Convert start and end dates into two lists of start and end dates, that we'll iterate over
    final_date = final_date.replace(year=1950) # TESTING
    start_list,end_list = convert_start_and_end_dates_to_era5_download_lists(start_date,final_date)
    
    # Download data into destination folder
    download_era5_time_invariant_data_to_netcdf(coords_era5, raw_fold/'ERA5_2023-01-01_invariants.nc')

    for start,end in zip(start_list,end_list):

        # Convert to relevant
        yyyy_mm = start.strftime('%Y-%m') # filename
        start   = start.strftime('%Y-%m-%d') # yyyy-mm-dd for use with cdsapi
        end     = end.strftime('%Y-%m-%d')

        # Get the data
        download_era5_surface_level_data_to_netcdf(coords_era5, start, end, raw_fold/f'ERA5_{yyyy_mm}_surface_variables.nc')
        download_era5_pressure_level_data_to_netcdf(coords_era5, start, end, raw_fold/f'ERA5_{yyyy_mm}_pressure_variables.nc')

    print('--- Downloads for basin {} complete.'.format(ix, basin_id))

Now running basin 0. CAN_01AD002
    Basin coordinates:         [-70.43208333  45.98541667 -68.07125     47.83791667]
    ERA5 download coordinates: [4]
    Flow obs unavailable:      ['iv']
    Download times:            ['1950-01-01', '2020-12-31']


2023-08-20 17:30:11,641 INFO Welcome to the CDS
2023-08-20 17:30:11,641 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2023-08-20 17:30:11,863 INFO Request is queued
2023-08-20 17:30:13,077 INFO Request is running
2023-08-20 17:34:32,804 INFO Request is completed
2023-08-20 17:34:32,804 INFO Downloading https://download-0006-clone.copernicus-climate.eu/cache-compute-0006/cache/data8/adaptor.mars.internal-1692574456.6285071-22756-2-d2b6cef2-d718-4ade-a8f1-6a6e736be074.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-01_surface_variables.nc (900.8K)
2023-08-20 17:34:35,147 INFO Download rate 384.4K/s


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-01_surface_variables.nc


2023-08-20 17:34:35,555 INFO Welcome to the CDS
2023-08-20 17:34:35,555 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2023-08-20 17:34:35,816 INFO Request is queued
2023-08-20 17:34:37,206 INFO Request is running
2023-08-20 17:38:56,928 INFO Request is completed
2023-08-20 17:38:56,929 INFO Downloading https://download-0008-clone.copernicus-climate.eu/cache-compute-0008/cache/data7/adaptor.mars.external-1692574640.5989888-7456-4-997c4416-87d1-4188-94fc-bdb639589698.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-01_pressure_variables.nc (516.4K)
2023-08-20 17:38:58,990 INFO Download rate 250.6K/s
2023-08-20 17:38:59,425 INFO Welcome to the CDS


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-01_pressure_variables.nc


2023-08-20 17:38:59,426 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2023-08-20 17:38:59,649 INFO Request is queued
2023-08-20 17:39:00,906 INFO Request is running
2023-08-20 17:43:20,406 INFO Request is completed
2023-08-20 17:43:20,406 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.internal-1692574969.1671119-29204-1-23fb0dfa-a7db-4d43-a25c-a226d5bcc6ec.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-02_surface_variables.nc (813.9K)
2023-08-20 17:43:22,876 INFO Download rate 329.5K/s


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-02_surface_variables.nc


2023-08-20 17:43:23,291 INFO Welcome to the CDS
2023-08-20 17:43:23,291 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2023-08-20 17:43:23,527 INFO Request is queued
2023-08-20 17:43:24,742 INFO Request is running
2023-08-20 17:46:16,903 INFO Request is completed
2023-08-20 17:46:16,904 INFO Downloading https://download-0018.copernicus-climate.eu/cache-compute-0018/cache/data2/adaptor.mars.external-1692575152.581001-26054-4-7306563f-0dc8-4ff3-a1aa-c2dbe838e284.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-02_pressure_variables.nc (466.6K)
2023-08-20 17:46:19,129 INFO Download rate 209.8K/s


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-02_pressure_variables.nc


2023-08-20 17:46:19,538 INFO Welcome to the CDS
2023-08-20 17:46:19,540 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2023-08-20 17:46:19,813 INFO Request is queued
2023-08-20 17:46:21,286 INFO Request is running
2023-08-20 17:50:41,187 INFO Request is completed
2023-08-20 17:50:41,187 INFO Downloading https://download-0007-clone.copernicus-climate.eu/cache-compute-0007/cache/data5/adaptor.mars.internal-1692575426.908268-21961-10-b7423664-ef8b-431e-b4b7-e583e851f8fc.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-03_surface_variables.nc (900.8K)
2023-08-20 17:50:43,427 INFO Download rate 402.1K/s
2023-08-20 17:50:43,827 INFO Welcome to the CDS


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-03_surface_variables.nc


2023-08-20 17:50:43,827 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2023-08-20 17:50:44,077 INFO Request is queued
2023-08-20 17:50:45,287 INFO Request is running
2023-08-20 17:53:37,497 INFO Request is completed
2023-08-20 17:53:37,506 INFO Downloading https://download-0013-clone.copernicus-climate.eu/cache-compute-0013/cache/data5/adaptor.mars.external-1692575602.8745754-9774-6-5484edeb-3b29-499a-9290-783e0777ac77.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-03_pressure_variables.nc (516.4K)
2023-08-20 17:53:39,676 INFO Download rate 238K/s  
2023-08-20 17:53:40,086 INFO Welcome to the CDS


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-03_pressure_variables.nc


2023-08-20 17:53:40,086 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2023-08-20 17:53:40,359 INFO Request is queued
2023-08-20 17:53:41,617 INFO Request is running
2023-08-20 17:58:01,337 INFO Request is completed
2023-08-20 17:58:01,337 INFO Downloading https://download-0003-clone.copernicus-climate.eu/cache-compute-0003/cache/data8/adaptor.mars.internal-1692575854.5665119-1687-3-263c57c1-3ccb-4b60-9d4c-a93569d062b4.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-04_surface_variables.nc (871.8K)
2023-08-20 17:58:03,537 INFO Download rate 396.4K/s


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-04_surface_variables.nc


2023-08-20 17:58:04,037 INFO Welcome to the CDS
2023-08-20 17:58:04,037 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2023-08-20 17:58:04,317 INFO Request is queued
2023-08-20 17:58:05,551 INFO Request is running
2023-08-20 18:00:57,791 INFO Request is completed
2023-08-20 18:00:57,791 INFO Downloading https://download-0016.copernicus-climate.eu/cache-compute-0016/cache/data0/adaptor.mars.external-1692576038.0140517-21046-13-8972df38-e8bb-46ee-b3d3-5c2d0fec4ec7.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-04_pressure_variables.nc (499.8K)
2023-08-20 18:00:59,881 INFO Download rate 239.2K/s
2023-08-20 18:01:00,291 INFO Welcome to the CDS


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-04_pressure_variables.nc


2023-08-20 18:01:00,291 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2023-08-20 18:01:00,552 INFO Request is queued
2023-08-20 18:01:01,781 INFO Request is running
2023-08-20 18:05:21,792 INFO Request is completed
2023-08-20 18:05:21,793 INFO Downloading https://download-0003-clone.copernicus-climate.eu/cache-compute-0003/cache/data5/adaptor.mars.internal-1692576310.1413639-30915-17-393b4146-9fb8-47b7-85c2-e6a69068a20c.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-05_surface_variables.nc (900.8K)
2023-08-20 18:05:24,296 INFO Download rate 360K/s  


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-05_surface_variables.nc


2023-08-20 18:05:24,716 INFO Welcome to the CDS
2023-08-20 18:05:24,717 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2023-08-20 18:05:24,935 INFO Request is queued
2023-08-20 18:05:26,152 INFO Request is running
2023-08-20 18:09:45,842 INFO Request is completed
2023-08-20 18:09:45,842 INFO Downloading https://download-0013-clone.copernicus-climate.eu/cache-compute-0013/cache/data7/adaptor.mars.external-1692576486.7515442-16239-5-99174051-76d5-4853-a6b8-fdfa35e2fcb9.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-05_pressure_variables.nc (516.4K)
2023-08-20 18:09:48,071 INFO Download rate 231.6K/s


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-05_pressure_variables.nc


2023-08-20 18:09:48,561 INFO Welcome to the CDS
2023-08-20 18:09:48,561 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2023-08-20 18:09:48,893 INFO Request is queued
2023-08-20 18:09:50,692 INFO Request is running
2023-08-20 18:14:11,099 INFO Request is completed
2023-08-20 18:14:11,100 INFO Downloading https://download-0012-clone.copernicus-climate.eu/cache-compute-0012/cache/data8/adaptor.mars.internal-1692576825.1496243-23391-19-b4fdf8c3-e1b7-4d58-83fe-5de51c2f9fcd.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-06_surface_variables.nc (871.8K)
2023-08-20 18:14:13,404 INFO Download rate 378.5K/s
2023-08-20 18:14:13,814 INFO Welcome to the CDS


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-06_surface_variables.nc


2023-08-20 18:14:13,815 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2023-08-20 18:14:14,094 INFO Request is queued
2023-08-20 18:14:15,464 INFO Request is running
2023-08-20 18:18:35,012 INFO Request is completed
2023-08-20 18:18:35,013 INFO Downloading https://download-0017.copernicus-climate.eu/cache-compute-0017/cache/data4/adaptor.mars.external-1692577020.2045977-12487-16-470a392a-ac10-4757-9262-99bd252337e5.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-06_pressure_variables.nc (499.8K)
2023-08-20 18:18:38,379 INFO Download rate 148.5K/s
2023-08-20 18:18:38,796 INFO Welcome to the CDS


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-06_pressure_variables.nc


2023-08-20 18:18:38,797 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2023-08-20 18:18:39,068 INFO Request is queued
2023-08-20 18:18:40,276 INFO Request is running
2023-08-20 18:23:00,255 INFO Request is completed
2023-08-20 18:23:00,255 INFO Downloading https://download-0010-clone.copernicus-climate.eu/cache-compute-0010/cache/data7/adaptor.mars.internal-1692577367.778073-29453-19-09a9cc05-0e99-4257-b052-7efa3c2d92b5.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-07_surface_variables.nc (900.8K)
2023-08-20 18:23:04,715 INFO Download rate 202K/s  


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-07_surface_variables.nc


2023-08-20 18:23:05,246 INFO Welcome to the CDS
2023-08-20 18:23:05,246 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2023-08-20 18:23:05,517 INFO Request is queued
2023-08-20 18:23:06,818 INFO Request is running
2023-08-20 18:25:59,704 INFO Request is completed
2023-08-20 18:25:59,704 INFO Downloading https://download-0018.copernicus-climate.eu/cache-compute-0018/cache/data5/adaptor.mars.external-1692577546.7366464-7893-8-db6f6a90-b9b6-4aa8-82ce-eedb169a6588.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-07_pressure_variables.nc (516.4K)
2023-08-20 18:26:01,978 INFO Download rate 227.3K/s


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-07_pressure_variables.nc


2023-08-20 18:26:02,496 INFO Welcome to the CDS
2023-08-20 18:26:02,496 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2023-08-20 18:26:02,736 INFO Request is queued
2023-08-20 18:26:04,178 INFO Request is running
2023-08-20 18:32:26,855 INFO Request is completed
2023-08-20 18:32:26,856 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data6/adaptor.mars.internal-1692577813.5707986-10798-13-ec9bd662-ff80-419f-8038-23f5402fef6e.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-08_surface_variables.nc (900.8K)
2023-08-20 18:32:29,740 INFO Download rate 312.3K/s


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-08_surface_variables.nc


2023-08-20 18:32:30,211 INFO Welcome to the CDS
2023-08-20 18:32:30,211 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2023-08-20 18:32:30,511 INFO Request is queued
2023-08-20 18:32:31,831 INFO Request is running
2023-08-20 18:36:52,347 INFO Request is completed
2023-08-20 18:36:52,347 INFO Downloading https://download-0003-clone.copernicus-climate.eu/cache-compute-0003/cache/data9/adaptor.mars.external-1692578118.0630608-15704-13-e1b2b5d9-9e25-4b41-8494-dad52b7e954e.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-08_pressure_variables.nc (516.4K)
2023-08-20 18:37:13,120 INFO Download rate 24.9K/s 


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-08_pressure_variables.nc


2023-08-20 18:37:13,883 INFO Welcome to the CDS
2023-08-20 18:37:13,883 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2023-08-20 18:37:14,255 INFO Request is queued
2023-08-20 18:37:15,461 INFO Request is running
2023-08-20 18:41:34,922 INFO Request is completed
2023-08-20 18:41:34,923 INFO Downloading https://download-0021.copernicus-climate.eu/cache-compute-0021/cache/data5/adaptor.mars.internal-1692578476.4790502-27780-4-0d46f5be-6aec-4e5b-8730-3bc5afb6a693.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-09_surface_variables.nc (871.8K)
2023-08-20 18:42:19,806 INFO Download rate 19.4K/s 
2023-08-20 18:42:20,219 INFO Welcome to the CDS


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-09_surface_variables.nc


2023-08-20 18:42:20,220 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2023-08-20 18:42:20,449 INFO Request is queued
2023-08-20 18:42:21,665 INFO Request is running
2023-08-20 18:45:14,056 INFO Request is completed
2023-08-20 18:45:14,057 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.external-1692578693.4796128-28130-1-d0179c7d-25a7-4d49-8b44-792adbcdf633.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-09_pressure_variables.nc (499.8K)
2023-08-20 18:45:16,263 INFO Download rate 226.6K/s
2023-08-20 18:45:16,668 INFO Welcome to the CDS


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-09_pressure_variables.nc


2023-08-20 18:45:16,669 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2023-08-20 18:45:16,894 INFO Request is queued
2023-08-20 18:45:18,123 INFO Request is running
2023-08-20 18:49:37,965 INFO Request is completed
2023-08-20 18:49:37,966 INFO Downloading https://download-0001-clone.copernicus-climate.eu/cache-compute-0001/cache/data9/adaptor.mars.internal-1692578965.2353237-19550-4-cc17d9d4-df07-4ab0-b4b5-2d46fcc55c09.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-10_surface_variables.nc (900.8K)
2023-08-20 18:49:40,215 INFO Download rate 400.6K/s


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-10_surface_variables.nc


2023-08-20 18:49:40,638 INFO Welcome to the CDS
2023-08-20 18:49:40,639 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2023-08-20 18:49:40,880 INFO Request is queued
2023-08-20 18:49:42,401 INFO Request is running
2023-08-20 18:54:01,878 INFO Request is completed
2023-08-20 18:54:01,878 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data1/adaptor.mars.external-1692579144.9184074-27003-18-19412f0b-627a-4d25-ac5b-380c24bdc2c6.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-10_pressure_variables.nc (516.4K)
2023-08-20 18:54:05,803 INFO Download rate 131.6K/s
2023-08-20 18:54:06,217 INFO Welcome to the CDS


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-10_pressure_variables.nc


2023-08-20 18:54:06,217 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2023-08-20 18:54:06,446 INFO Request is queued
2023-08-20 18:54:07,731 INFO Request is running
2023-08-20 18:58:27,793 INFO Request is completed
2023-08-20 18:58:27,794 INFO Downloading https://download-0017.copernicus-climate.eu/cache-compute-0017/cache/data6/adaptor.mars.internal-1692579484.7532084-8757-3-cabf2ab8-4091-4ca7-81c7-93d2d97906df.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-11_surface_variables.nc (871.8K)
2023-08-20 18:58:30,179 INFO Download rate 365.8K/s


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-11_surface_variables.nc


2023-08-20 18:58:30,716 INFO Welcome to the CDS
2023-08-20 18:58:30,717 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2023-08-20 18:58:31,029 INFO Request is queued
2023-08-20 18:58:32,311 INFO Request is running
2023-08-20 19:01:25,831 INFO Request is completed
2023-08-20 19:01:25,832 INFO Downloading https://download-0000-clone.copernicus-climate.eu/cache-compute-0000/cache/data9/adaptor.mars.external-1692579675.0910523-536-8-4356a5f3-2c3d-49d8-9234-eefe9491f917.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-11_pressure_variables.nc (499.8K)
2023-08-20 19:01:27,898 INFO Download rate 242K/s  


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-11_pressure_variables.nc


2023-08-20 19:01:28,323 INFO Welcome to the CDS
2023-08-20 19:01:28,324 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2023-08-20 19:01:28,544 INFO Request is queued
2023-08-20 19:01:29,751 INFO Request is running
2023-08-20 19:07:50,036 INFO Request is completed
2023-08-20 19:07:50,036 INFO Downloading https://download-0003-clone.copernicus-climate.eu/cache-compute-0003/cache/data8/adaptor.mars.internal-1692579940.3982298-5004-3-222874f1-0897-4d20-a02d-8cedd3fc6089.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-12_surface_variables.nc (900.8K)
2023-08-20 19:07:58,122 INFO Download rate 111.4K/s


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-12_surface_variables.nc


2023-08-20 19:07:58,538 INFO Welcome to the CDS
2023-08-20 19:07:58,538 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2023-08-20 19:07:58,757 INFO Request is queued
2023-08-20 19:08:04,383 INFO Request is running
2023-08-20 19:12:19,709 INFO Request is completed
2023-08-20 19:12:19,711 INFO Downloading https://download-0003-clone.copernicus-climate.eu/cache-compute-0003/cache/data9/adaptor.mars.external-1692580244.1355445-13334-1-79962571-50d2-4233-9bc2-9f99b51e2e8a.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-12_pressure_variables.nc (516.4K)
2023-08-20 19:12:23,763 INFO Download rate 127.5K/s


Successfully downloaded C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-12_pressure_variables.nc
    Downloads complete.


### Functions

In [38]:
from datetime import datetime, timedelta
import geopandas as gpd
import math
import cdsapi
import os

In [7]:
def prepare_forcing_outputs(df,i,data_path):
    
    '''Prepares output folders for lumped and distributed forcing downloads outcomes'''
    
    from pathlib import Path
    
    # Get identifiers
    country = df.iloc[i].Country
    basin_id = df.iloc[i].Station_id
    full_id = country + '_' + basin_id
    
    # Construct the paths
    main_folder = Path(data_path) / 'basin_data' / (country + '_' + basin_id) / 'forcing' 
    lump_folder = main_folder / 'lumped'
    dist_folder = main_folder / 'distributed'
    raw_folder  = main_folder / 'raw'
    
    # Make the paths
    lump_folder.mkdir(parents=True, exist_ok=True)
    dist_folder.mkdir(parents=True, exist_ok=True)
    raw_folder.mkdir(parents=True, exist_ok=True)
   
    return raw_folder, lump_folder, dist_folder

In [80]:
def download_era5_surface_level_data_to_netcdf(coordinates,date_s,date_e,file):
    
    '''Downloads specified ERA5 surface level parameters for a specified bounding box'''
    # Pressure level variables: 

    if not os.path.isfile(file):
        # Make sure the connection is re-tried if it fails
        retries_max = 1
        retries_cur = 1
        while retries_cur <= retries_max:
            try:
    
                # connect to Copernicus (requires .cdsapirc file in $HOME)
                c = cdsapi.Client()
            
                # specify and retrieve data
                c.retrieve('reanalysis-era5-single-levels', { # do not change this!
                           'product_type': 'reanalysis',
                           'format'      : 'netcdf',
                           'variable'    : [
                               'mean_surface_downward_long_wave_radiation_flux',
                               'mean_surface_net_long_wave_radiation_flux',
                               'mean_surface_downward_short_wave_radiation_flux',
                               'mean_surface_net_short_wave_radiation_flux',
                               'mean_total_precipitation_rate',
                               'surface_pressure',
                               'mean_potential_evaporation_rate',
                           ],
                           'date': f'{date_s}/{date_e}', # 'yyyy-mm-dd'
                           'time': '00/to/23/by/1',
                           'area': coordinates, # expected as [lat_max, lon_min, lat_min, lon_max], e.g. [51.75/-116.5/51.0/-115.5]
                           'grid': '0.25/0.25', # Latitude/longitude grid: east-west (longitude) and north-south resolution (latitude).
                    },
                    file) # file path and name

                # track progress
                print('Successfully downloaded ' + str(file))

            except:
                print('Error downloading ' + str(file) + ' on try ' + str(retries_cur))
                retries_cur += 1
                continue
            else:
                break
    return

In [81]:
def download_era5_pressure_level_data_to_netcdf(coordinates,date_s,date_e,file):
    
    '''Downloads specified ERA5 pressure level parameters for a specified bounding box'''
    # Pressure level variables: https://confluence.ecmwf.int/pages/viewpage.action?pageId=82870405#ERA5:datadocumentation-Table9
    # MARS requests: https://confluence.ecmwf.int/display/UDOC/HRES%3A+Atmospheric+%28oper%29%2C+Model+level+%28ml%29%2C+Forecast+%28fc%29%3A+Guidelines+to+write+efficient+MARS+requests
    
    if not os.path.isfile(file):
        # Make sure the connection is re-tried if it fails
        retries_max = 1
        retries_cur = 1
        while retries_cur <= retries_max:
            try:
    
                # connect to Copernicus (requires .cdsapirc file in $HOME)
                c = cdsapi.Client()
            
                # specify and retrieve data
                c.retrieve('reanalysis-era5-complete', {    # do not change this!
                           'class'   : 'ea',
                           'expver'  : '1',
                           'stream'  : 'oper',
                           'type'    : 'an',
                           'levtype' : 'ml',
                           'levelist': '137',
                           'param'   : '130/131/132/133', # i.e., Temperature, U and V wind, specific humidity
                           'date'    : f'{date_s}/to/{date_e}', # 'yyyy-mm-dd'
                           'time'    : '00/to/23/by/1', 
                           'area'    : coordinates, # expected as [lat_max, lon_min, lat_min, lon_max], e.g. [51.75/-116.5/51.0/-115.5]
                           'grid'    : '0.25/0.25', # Latitude/longitude grid: east-west (longitude) and north-south resolution (latitude).
                           'format'  : 'netcdf',
                    }, file)

                # track progress
                print('Successfully downloaded ' + str(file))

            except:
                print('Error downloading ' + str(file) + ' on try ' + str(retries_cur))
                retries_cur += 1
                continue
            else:
                break
    return

In [10]:
def download_era5_time_invariant_data_to_netcdf(coordinates,file):
    
    '''Downloads all ERA5 time-invariant parameters for a specified bounding box'''
    # Time-invariants: https://confluence.ecmwf.int/pages/viewpage.action?pageId=82870405#ERA5:datadocumentation-Table1

    if not os.path.isfile(file):
        # Make sure the connection is re-tried if it fails
        retries_max = 1
        retries_cur = 1
        while retries_cur <= retries_max:
            try:
    
                # connect to Copernicus (requires .cdsapirc file in $HOME)
                c = cdsapi.Client()
            
                # specify and retrieve data
                c.retrieve('reanalysis-era5-complete', {    # do not change this!
                           'stream' : 'oper',
                           'levtype': 'sf',
                           'param'  : '26/228007/27/28/29/30/43/74/129/160/161/162/163/172', # i.e., all time-invariant values
                           'date'   : '2023-01-01', # arbitrary date
                           'time'   : '00', # time-invariant data; no need to get more than a single time step
                           'area'   : coordinates, # expected as [lat_max, lon_min, lat_min, lon_max], e.g. [51.75/-116.5/51.0/-115.5]
                           'grid'   : '0.25/0.25', # Latitude/longitude grid: east-west (longitude) and north-south resolution (latitude).
                           'format' : 'netcdf',
                    }, file)

                # track progress
                print('Successfully downloaded ' + str(file))

            except:
                print('Error downloading ' + str(file) + ' on try ' + str(retries_cur))
                retries_cur += 1
                continue
            else:
                break
    return

In [79]:
def convert_start_and_end_dates_to_era5_download_lists(start,end):

    '''Takes two datetime.datetime(y,m,d,h,min) objects and returns two lists with start and end dates for ERA5 downloads at monthly intervals'''

    # Initiate the date we're current working with
    cur = start 
    
    # Initiate the lists
    start_l = []
    end_l = []
    
    # Loop over the dates, until we have the end date
    while cur < end:
        
        # Add to start list
        start_l.append(cur)
        
        # Figure out the index of the next month and if the year changes:
        tmp = cur + timedelta(days=31) # Add 31 days to current date to ensure we're in the next month, might also switch the year
        next_month = tmp.month #  Extract 'month' from this object
        next_year = tmp.year # Extract the year too. If we ticked over into a new year we need to track this, otherwise we never increment the year

        # Create the end-of-month date
        cur = cur.replace(year=next_year, month=next_month) - timedelta(days=1)
        
        # Ensure this does not step over our end date
        if cur >= end:
            cur = end

        # Add to end list
        end_l.append(cur)

        # Add 1 day to create the new start-of-month date
        cur = cur + timedelta(days=1)
        
    return start_l,end_l

In [11]:
def convert_to_date_only(time_string, original_format="%Y-%m-%d %H:%M:%S", target_format="%Y-%m-%d"):
    try:
        datetime_obj = datetime.strptime(time_string, original_format)
        date_only_string = datetime_obj.strftime(target_format)
        return date_only_string
    except ValueError:
        return None

In [12]:
def round_flow_obs_to_days(times):
    
    '''Takes two times ([time1,time2]) in 'YYYY-MM-DD hh:mm:ss' and rounds to days '''
    
    return [convert_to_date_only(times[0]),
            convert_to_date_only(times[1])]

In [13]:
def flow_obs_unavailable(df,country,station):
    
    '''Checks in the "unusable" dataframe if iv, dv or both are unavailable for a station'''
    
    missing = []
    for ix,row in df.iterrows():
        if row.Country != country:
            continue
        if row.Station_id == station:
            missing.append(row.Missing)
    
    return missing

In [14]:
def find_flow_obs_times_from_metadata(row,missing):
    
    '''Finds required data start and end times from flow observations in meta-data file'''
    
    # Start and end dates come as 'YYYY-MM-DD hh:mm:ss' strings so we can directly compare them
    # Source: https://stackoverflow.com/a/54987418
    
    # Shorthands
    iv_s = row.iv_flow_obs_availability_start
    iv_e = row.iv_flow_obs_availability_end
    dv_s = row.dv_flow_obs_availability_start
    dv_e = row.dv_flow_obs_availability_end
    
    # Missing data cases
    if ('iv' in missing) and ('dv' in missing):
        return []
    
    elif 'iv' in missing:
        times = [dv_s,dv_e]
        for time in times: 
            assert is_valid_date_format(time), f'{time} not in expected format'
        return [dv_s,dv_e]
    
    elif 'dv' in missing:
        times = [iv_s,iv_e]
        for time in times: 
            assert is_valid_date_format(time), f'{time} not in expected format'
        return [iv_s,iv_e]
    
    else:
        times = [iv_s,iv_e,dv_s,dv_e]
        for time in times: 
            assert is_valid_date_format(time), f'{time} not in expected format'
    return [min(iv_s,dv_s),max(iv_e,dv_e)]

In [15]:
def is_valid_date_format(date_string, date_format='%Y-%m-%d %H:%M:%S'):
    try:
        datetime.strptime(date_string, date_format)
        return True
    except ValueError:
        return False

In [16]:
# Find shapefile bounding box
def find_shapefile_bounds(path):
    
    # Modified from: https://github.com/CH-Earth/CWARHM/blob/main/0_tools/ERA5_find_download_coordinates_from_shapefile.ipynb
    shp = gpd.read_file(path)
    
    return shp.total_bounds

In [17]:
def find_download_coords_from_bounds(coords, target='ERA5'):
    
    '''
    Determines download coordinates from shapefile bounds for a given data set.
    Assumes coodinates are an array: [lon_min, lat_min, lon_max, lat_max] (bottom-left, top-right).
    Returns separate lat and lon vectors.
    '''

    # Source: https://github.com/CH-Earth/CWARHM/blob/main/3a_forcing/1a_download_forcing/download_ERA5_pressureLevel_annual.ipynb   
    
    # Extract values
    lon = [coords[0],coords[2]]
    lat = [coords[1],coords[3]]
    
    if target == 'ERA5':
        
        # Round to ERA5 0.25 degree resolution
        rounded_lon = [math.floor(lon[0]*4)/4, math.ceil(lon[1]*4)/4]
        rounded_lat = [math.floor(lat[0]*4)/4, math.ceil(lat[1]*4)/4]

        # Find if we are still in the representative area of a different ERA5 grid cell
        if lat[0] > rounded_lat[0]+0.125:
            rounded_lat[0] += 0.25
        if lon[0] > rounded_lon[0]+0.125:
            rounded_lon[0] += 0.25
        if lat[1] < rounded_lat[1]-0.125:
            rounded_lat[1] -= 0.25
        if lon[1] < rounded_lon[1]-0.125:
            rounded_lon[1] -= 0.25
    
        # Make a download string ready for ERA5 (cdsapi) format
        dl_string = '{}/{}/{}/{}'.format(rounded_lat[1],rounded_lon[0],rounded_lat[0],rounded_lon[1])
    
    return dl_string, rounded_lat, rounded_lon