### Add derived wind direction 
We added derived variables to the ERA5 files before the basin-averaging step, but cannot easily average wind direction with EASYMORE's current capabilities: this would require some form of circular averaging that is currently unavailable. Therefore we add the derived wind direction to all files (raw, subbasin, lumped) individually, using the `u` and `v` values in each. 

See: https://confluence.ecmwf.int/pages/viewpage.action?pageId=133262398

In [8]:
import glob
import sys
import netCDF4 as nc4
import pandas as pd
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

### Config handling

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [3]:
# Get the required info from the config file
data_path = cs.read_from_config(config_file,'data_path')

# CAMELS-spat metadata
cs_meta_path = cs.read_from_config(config_file,'cs_basin_path')
cs_meta_name = cs.read_from_config(config_file,'cs_meta_name')
cs_unusable_name = cs.read_from_config(config_file,'cs_unusable_name')

# Basin folder
cs_basin_folder = cs.read_from_config(config_file, 'cs_basin_path')
basins_path = Path(data_path) / cs_basin_folder

### Data loading

In [4]:
# CAMELS-spat metadata file
cs_meta_path = Path(data_path) / cs_meta_path
cs_meta = pd.read_csv(cs_meta_path / cs_meta_name)

In [5]:
# Open list of unusable stations; Enforce reading IDs as string to keep leading 0's
cs_unusable = pd.read_csv(cs_meta_path / cs_unusable_name, dtype={'Station_id': object})

### Processing

In [6]:
debug_message = f'\n!!! CHECK DEBUGGING STATUS: \n- Testing 1 basin\n'

In [9]:
print(debug_message)
for ix,row in cs_meta.iterrows():

    # DEBUGGING
    if ix != 1: continue
    
    # Get shapefile path to determine download coordinates, and forcing destination path
    basin_id, shp_lump_path, shp_dist_path, _, _ = cs.prepare_delineation_outputs(cs_meta, ix, Path(data_path)/cs_basin_folder)
    raw_fold, lump_fold, dist_fold = cs.prepare_forcing_outputs(cs_meta, ix, Path(data_path)/cs_basin_folder) # Returns folders only, not file names
    print('--- Now running basin {}. {}'.format(ix, basin_id))
    
    # Check if we need to run downloads for this station at all
    missing = cs.flow_obs_unavailable(cs_unusable, row.Country, row.Station_id)
    if 'iv' in missing and 'dv' in missing: 
        continue # with next station, because we have no observations at all for this station

    # Find the files
    era5_merged_files = sorted(glob.glob(str(raw_fold/'ERA5_[0-9][0-9][0-9][0-9]-[0-9][0-9].nc'))) # list
    era5_lump_files = sorted(glob.glob(str(lump_fold/'ERA5_lumped_*.nc'))) # list
    era5_dist_files = sorted(glob.glob(str(dist_fold/'ERA5_dist_*.nc'))) # list
    era5_all_files = era5_merged_files + era5_lump_files + era5_dist_files

    # Loop over the files and add new variables
    for file in era5_all_files:
        print(f'Processing {file}')
        with nc4.Dataset(file, 'r+') as f:
            # Add wind direction, function argument 'dims' toggles a switch away from
            #  default behavior in make_nc_variable() which assumes lat & lon dims exist
            if 'hru' in f.dimensions:
                f = cs.derive_wind_direction(f,dims='hru')
            else:
                f = cs.derive_wind_direction(f)

print(debug_message)


!!! CHECK DEBUGGING STATUS: 
- Testing 1 basin

--- Now running basin 1. CAN_01AD003
Processing /Users/wmk934/data/CAMELS_spat/camels-spat-data/basin_data/CAN_01AD003/forcing/raw/ERA5_1951-10.nc
Processing /Users/wmk934/data/CAMELS_spat/camels-spat-data/basin_data/CAN_01AD003/forcing/lumped/ERA5_lumped_remapped_1951-10-01-00-00-00.nc
Processing /Users/wmk934/data/CAMELS_spat/camels-spat-data/basin_data/CAN_01AD003/forcing/distributed/ERA5_dist_remapped_1951-10-01-00-00-00.nc

!!! CHECK DEBUGGING STATUS: 
- Testing 1 basin

