# Average forcing into HRUs
We have raw ERA5 and EM-Earth forcing. Here we average those gridded data into HRU-averaged time series, for both the distributed and lumped catchment shapes.

Workflow, per catchment:
- Create forcing grid shapefiles
- Create a remapping csv file using 1 forcing file
- Remap all other forcing files using the remap file

In [1]:
import glob
import shutil
import sys
import pandas as pd
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

## Config handling

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [3]:
# Get the required info from the config file
data_path = cs.read_from_config(config_file,'data_path')

# CAMELS-spat metadata
cs_meta_path = cs.read_from_config(config_file,'cs_basin_path')
cs_meta_name = cs.read_from_config(config_file,'cs_meta_name')
cs_unusable_name = cs.read_from_config(config_file,'cs_unusable_name')

# Basin folder
cs_basin_folder = cs.read_from_config(config_file, 'cs_basin_path')
basins_path = Path(data_path) / cs_basin_folder

## Data loading

In [4]:
# CAMELS-spat metadata file
cs_meta_path = Path(data_path) / cs_meta_path
cs_meta = pd.read_csv(cs_meta_path / cs_meta_name)

In [5]:
# Open list of unusable stations; Enforce reading IDs as string to keep leading 0's
cs_unusable = pd.read_csv(cs_meta_path / cs_unusable_name, dtype={'Station_id': object})

## Processing

In [6]:
debug_message = f'\n!!! CHECK DEBUGGING STATUS: \n- Testing 2 basins\n'

In [7]:
# Set the spacing
era_spacing = 0.25
eme_spacing = 0.10

In [8]:
for ix,row in cs_meta.iterrows():

    # DEBUGGING
    if (ix != 0) and (ix != 9): continue
    
    # Check if we need to run downloads for this station at all
    missing = cs.flow_obs_unavailable(cs_unusable, row.Country, row.Station_id)
    if 'iv' in missing and 'dv' in missing: 
        continue # with next station, because we have no observations at all for this station
    
    # Get shapefile path to determine download coordinates, and forcing destination path
    basin_id, shp_lump_path, shp_dist_path, _, _ = cs.prepare_delineation_outputs(cs_meta, ix, Path(data_path)/cs_basin_folder)
    raw_fold, lump_fold, dist_fold = cs.prepare_forcing_outputs(cs_meta, ix, Path(data_path)/cs_basin_folder) # Returns folders only, not file names
    shp_dist_path = Path( str(shp_dist_path).format('basin') )
    print('--- Now running basin {}. {}'.format(ix, basin_id))

    # Ensure we have the CRS set in these shapes, because EASYMORE needs this
    for shp in [shp_lump_path, shp_dist_path]:
        cs.add_crs_to_shapefile(shp)
    
    # Get the forcing files
    eme_merged_files = sorted(glob.glob(str(raw_fold/'EM_Earth_[0-9][0-9][0-9][0-9]-[0-9][0-9].nc')))[0:2] # list
    era_merged_files = sorted(glob.glob(str(raw_fold/'ERA5_[0-9][0-9][0-9][0-9]-[0-9][0-9].nc')))[0:2] # list
    era_invariant = glob.glob(str(raw_fold/'ERA5_*_invariants.nc'))

    # Make forcing shapefiles
    era_grid_shp, eme_grid_shp = cs.prepare_forcing_grid_shapefiles(row.Country, row.Station_id, Path(data_path)/cs_basin_folder)
    for infile, outfile in zip([era_merged_files[0],eme_merged_files[0]], [era_grid_shp,eme_grid_shp]):
        cs.make_forcing_grid_shapefile(infile,outfile)

    # Add geopotential to ERA5 forcing grid shapefile
    cs.add_geopotential_to_era5_grid(era_invariant[0], era_grid_shp)

    # Prepare for remapping
    esmr_temp = cs.prepare_easymore_temp_folder(row.Country, row.Station_id, Path(data_path)/cs_basin_folder)

    # Check if can do the remapping with EASYMORE with files as-is, and act accordingly
    era5_can_remap = cs.check_can_remap_as_is(era_merged_files[0]) # we can assume that if this applies to one file, it applies to all
    if era5_can_remap:
        print('Remapping ERA5')
        era_lump_esmr = cs.easymore_workflow('ERA5', 'lumped', esmr_temp, era_grid_shp, shp_lump_path, lump_fold, era_merged_files)
        era_dist_esmr = cs.easymore_workflow('ERA5', 'dist',   esmr_temp, era_grid_shp, shp_dist_path, dist_fold, era_merged_files)
    else: 
        # Files are 1x1 (lat x lon), use the workflow that adds padding of empty cells around this so we can keep using EASYMORE
        era_lump_esmr = cs.easymore_workflow_with_cell_padding('ERA5', 'lumped', esmr_temp, era_grid_shp, shp_lump_path, lump_fold, 
                                                            era_merged_files, grid_spacing=era_spacing)
        era_dist_esmr = cs.easymore_workflow_with_cell_padding('ERA5', 'dist',   esmr_temp, era_grid_shp, shp_dist_path, dist_fold, 
                                                            era_merged_files, grid_spacing=era_spacing)

    # Repeate for EM-Earth: because EM-Earth has a smaller spacing than ERA5, it is possible that we can remap one but not the other, hence separate
    eme_can_remap = cs.check_can_remap_as_is(eme_merged_files[0]) # we can assume that if this applies to one file, it applies to all
    if eme_can_remap:
        print('Remapping EM-Earth')
        eme_lump_esmr = cs.easymore_workflow('EM-Earth', 'lumped', esmr_temp, eme_grid_shp, shp_lump_path, lump_fold, eme_merged_files)
        eme_dist_esmr = cs.easymore_workflow('EM-Earth', 'dist',   esmr_temp, eme_grid_shp, shp_dist_path, dist_fold, eme_merged_files)
    else:
        eme_lump_esmr = cs.easymore_workflow_with_cell_padding('EM-Earth', 'lumped', esmr_temp, eme_grid_shp, shp_lump_path, lump_fold, 
                                                            eme_merged_files, grid_spacing=eme_spacing)
        eme_dist_esmr = cs.easymore_workflow_with_cell_padding('EM-Earth', 'dist',   esmr_temp, eme_grid_shp, shp_dist_path, dist_fold, 
                                                            eme_merged_files, grid_spacing=eme_spacing)

    # Create a graphical check of what we just did
    fig_file = esmr_temp.parent / f'{row.Country}_{row.Station_id}_spatial_averaging.png'
    cs.era5_eme_easymore_plotting_loop( [era_lump_esmr, era_dist_esmr, eme_lump_esmr, eme_dist_esmr], esmr_temp, fig_file )
            
    # Remove the EASYMORE temp folder
    #shutil.rmtree(esmr_temp)        

--- Now running basin 0. CAN_01AD002
Remapping ERA5
EASYMORE version 1.0.0 is initiated.
EASYMORE is given multiple variables for remapping but only on format and fill value. EASYMORE repeats the format and fill value for all the variables in output files
EASYMORE will remap variable  msdwlwrf  from source file to variable  msdwlwrf  in remapped netCDF file
EASYMORE will remap variable  msnlwrf  from source file to variable  msnlwrf  in remapped netCDF file
EASYMORE will remap variable  msdwswrf  from source file to variable  msdwswrf  in remapped netCDF file
EASYMORE will remap variable  msnswrf  from source file to variable  msnswrf  in remapped netCDF file
EASYMORE will remap variable  mtpr  from source file to variable  mtpr  in remapped netCDF file
EASYMORE will remap variable  sp  from source file to variable  sp  in remapped netCDF file
EASYMORE will remap variable  mper  from source file to variable  mper  in remapped netCDF file
EASYMORE will remap variable  t  from source fil

  shp_int.to_file(self.temp_dir+self.case_name+'_intersected_shapefile.shp') # save the intersected files


Ended at date and time 2023-09-17 16:43:16.272423
It took 1.020943 seconds to finish creating of the remapping file
---------------------------
------REMAPPING------
netcdf output file will be compressed at level 4
Removing existing remapped .nc file.
Remapping C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-01.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\lumped/ERA5_lumped_remapped_1950-01-01-00-00-00.nc 
Started at date and time 2023-09-17 16:43:16.293473 
Ended at date and time 2023-09-17 16:44:03.853752 
It took 47.560279 seconds to finish the remapping of variable(s) 
---------------------
---------------------
remap file is provided; EASYMORE will use this file and skip creation of remapping file
EASYMORE will remap variable  msdwlwrf  from source file to variable  msdwlwrf  in remapped netCDF file
EASYMORE will remap variable  msnlwrf  from source file to variable  msnlwrf  in remapped netCDF fi

  shp_int.to_file(self.temp_dir+self.case_name+'_intersected_shapefile.shp') # save the intersected files


Ended at date and time 2023-09-17 16:44:49.137798
It took 2.892258 seconds to finish creating of the remapping file
---------------------------
------REMAPPING------
netcdf output file will be compressed at level 4
Removing existing remapped .nc file.
Remapping C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\ERA5_1950-01.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\distributed/ERA5_dist_remapped_1950-01-01-00-00-00.nc 
Started at date and time 2023-09-17 16:44:49.163747 
Ended at date and time 2023-09-17 16:45:38.461604 
It took 49.297857 seconds to finish the remapping of variable(s) 
---------------------
---------------------
remap file is provided; EASYMORE will use this file and skip creation of remapping file
EASYMORE will remap variable  msdwlwrf  from source file to variable  msdwlwrf  in remapped netCDF file
EASYMORE will remap variable  msnlwrf  from source file to variable  msnlwrf  in remapped netCDF

  shp_int.to_file(self.temp_dir+self.case_name+'_intersected_shapefile.shp') # save the intersected files


Ended at date and time 2023-09-17 16:46:23.960201
It took 1.298406 seconds to finish creating of the remapping file
---------------------------
------REMAPPING------
netcdf output file will be compressed at level 4
Removing existing remapped .nc file.
Remapping C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\EM_Earth_1950-01.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\lumped/EM-Earth_lumped_remapped_1950-01-01-00-00-00.nc 
Started at date and time 2023-09-17 16:46:23.979231 
Ended at date and time 2023-09-17 16:46:28.610683 
It took 4.631452 seconds to finish the remapping of variable(s) 
---------------------
---------------------
EASYMORE version 1.0.0 is initiated.
EASYMORE is given multiple variables for remapping but only on format and fill value. EASYMORE repeats the format and fill value for all the variables in output files
EASYMORE will remap variable  tmean  from source file to variable  tmean  in rem

  shp_int.to_file(self.temp_dir+self.case_name+'_intersected_shapefile.shp') # save the intersected files


Ended at date and time 2023-09-17 16:46:32.088477
It took 3.471777 seconds to finish creating of the remapping file
---------------------------
------REMAPPING------
netcdf output file will be compressed at level 4
Removing existing remapped .nc file.
Remapping C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\raw\EM_Earth_1950-01.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AD002\forcing\distributed/EM-Earth_dist_remapped_1950-01-01-00-00-00.nc 
Started at date and time 2023-09-17 16:46:32.109399 
Ended at date and time 2023-09-17 16:46:36.865394 
It took 4.755995 seconds to finish the remapping of variable(s) 
---------------------
---------------------
--- Now running basin 9. CAN_01AK006
EASYMORE version 1.0.0 is initiated.
EASYMORE is given multiple variables for remapping but only on format and fill value. EASYMORE repeats the format and fill value for all the variables in output files
EASYMORE will remap variable  msdwlwrf  

  shp_int.to_file(self.temp_dir+self.case_name+'_intersected_shapefile.shp') # save the intersected files


Ended at date and time 2023-09-17 16:46:41.896429
It took 0.350825 seconds to finish creating of the remapping file
---------------------------
------REMAPPING------
netcdf output file will be compressed at level 4
Removing existing remapped .nc file.
Remapping C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AK006\forcing\TEMP_easymore\ERA5_1966-11.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AK006\forcing\lumped/ERA5_lumped_remapped_1966-11-01-00-00-00.nc 
Started at date and time 2023-09-17 16:46:41.914770 
Ended at date and time 2023-09-17 16:47:26.577031 
It took 44.662261 seconds to finish the remapping of variable(s) 
---------------------
---------------------
remap file is provided; EASYMORE will use this file and skip creation of remapping file
EASYMORE will remap variable  msdwlwrf  from source file to variable  msdwlwrf  in remapped netCDF file
EASYMORE will remap variable  msnlwrf  from source file to variable  msnlwrf  in remapped

  shp_int.to_file(self.temp_dir+self.case_name+'_intersected_shapefile.shp') # save the intersected files


Ended at date and time 2023-09-17 16:48:12.318005
It took 0.260957 seconds to finish creating of the remapping file
---------------------------
------REMAPPING------
netcdf output file will be compressed at level 4
Removing existing remapped .nc file.
Remapping C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AK006\forcing\TEMP_easymore\ERA5_1966-11.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AK006\forcing\distributed/ERA5_dist_remapped_1966-11-01-00-00-00.nc 
Started at date and time 2023-09-17 16:48:12.328943 
Ended at date and time 2023-09-17 16:48:55.812765 
It took 43.483822 seconds to finish the remapping of variable(s) 
---------------------
---------------------
remap file is provided; EASYMORE will use this file and skip creation of remapping file
EASYMORE will remap variable  msdwlwrf  from source file to variable  msdwlwrf  in remapped netCDF file
EASYMORE will remap variable  msnlwrf  from source file to variable  msnlwrf  in remap

  shp_int.to_file(self.temp_dir+self.case_name+'_intersected_shapefile.shp') # save the intersected files


Ended at date and time 2023-09-17 16:49:40.460707
It took 0.346612 seconds to finish creating of the remapping file
---------------------------
------REMAPPING------
netcdf output file will be compressed at level 4
Removing existing remapped .nc file.
Remapping C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AK006\forcing\TEMP_easymore\EM_Earth_1966-11.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AK006\forcing\lumped/EM-Earth_lumped_remapped_1966-11-01-00-00-00.nc 
Started at date and time 2023-09-17 16:49:40.471547 
Ended at date and time 2023-09-17 16:49:44.879734 
It took 4.408187 seconds to finish the remapping of variable(s) 
---------------------
---------------------
remap file is provided; EASYMORE will use this file and skip creation of remapping file
EASYMORE will remap variable  tmean  from source file to variable  tmean  in remapped netCDF file
EASYMORE will remap variable  prcp  from source file to variable  prcp  in remapped netC

  shp_int.to_file(self.temp_dir+self.case_name+'_intersected_shapefile.shp') # save the intersected files


Ended at date and time 2023-09-17 16:49:49.782961
It took 0.251132 seconds to finish creating of the remapping file
---------------------------
------REMAPPING------
netcdf output file will be compressed at level 4
Removing existing remapped .nc file.
Remapping C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AK006\forcing\TEMP_easymore\EM_Earth_1966-11.nc to C:\Globus endpoint\CAMELS_spat\camels-spat-data\basin_data\CAN_01AK006\forcing\distributed/EM-Earth_dist_remapped_1966-11-01-00-00-00.nc 
Started at date and time 2023-09-17 16:49:49.793950 
Ended at date and time 2023-09-17 16:49:54.111983 
It took 4.318033 seconds to finish the remapping of variable(s) 
---------------------
---------------------
remap file is provided; EASYMORE will use this file and skip creation of remapping file
EASYMORE will remap variable  tmean  from source file to variable  tmean  in remapped netCDF file
EASYMORE will remap variable  prcp  from source file to variable  prcp  in remapped n