# UTC to LST
We want all the forcing to be in local standard time to match the streamflow observations.

In [1]:
import glob
import os
import sys
import netCDF4 as nc4
import pandas as pd
import shutil
import xarray as xr
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

### Config handling

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [3]:
# Get the required info from the config file
data_path = cs.read_from_config(config_file,'data_path')

# CAMELS-spat metadata
cs_meta_path = cs.read_from_config(config_file,'cs_basin_path')
cs_meta_name = cs.read_from_config(config_file,'cs_meta_name')
cs_unusable_name = cs.read_from_config(config_file,'cs_unusable_name')

# Basin folder
cs_basin_folder = cs.read_from_config(config_file, 'cs_basin_path')
basins_path = Path(data_path) / cs_basin_folder

### Data loading

In [4]:
# CAMELS-spat metadata file
cs_meta_path = Path(data_path) / cs_meta_path
cs_meta = pd.read_csv(cs_meta_path / cs_meta_name)
cs_unusable = pd.read_csv(cs_meta_path / cs_unusable_name,  dtype={'Station_id': object}) # Enforce reading IDs as string to keep leading 0's

### Processing

In [5]:
debug_message = f'\n!!! CHECK DEBUGGING STATUS: \n- Testing 1 basin\n'

In [7]:
print(debug_message)
for ix,row in cs_meta.iterrows():

    # DEBUGGING
    if ix != 0: continue
    
    # Get forcing paths
    basin_id, _, _, _, _ = cs.prepare_delineation_outputs(cs_meta, ix, Path(data_path)/cs_basin_folder)
    raw_fold, lump_fold, dist_fold = cs.prepare_forcing_outputs(cs_meta, ix, Path(data_path)/cs_basin_folder) # Returns folders only, not file names
    print('--- Now running basin {}. {}'.format(ix, basin_id))
    
    # Check if we need to run downloads for this station at all
    missing = cs.flow_obs_unavailable(cs_unusable, row.Country, row.Station_id)
    if 'iv' in missing and 'dv' in missing: 
        continue # with next station, because we have no observations at all for this station
    
    # Find the files
    raw_files = sorted(glob.glob(str(raw_fold/'*.nc'))) # list
    lump_files = sorted(glob.glob(str(lump_fold/'*.nc'))) # list
    dist_files = sorted(glob.glob(str(dist_fold/'*.nc'))) # list
    all_files = raw_files + lump_files + dist_files

    # Find LST
    # We can simply use dv_flow_obs_timezone here because we already know the USA gauges
    #  show consistent LSTs for IV and DV observations, and for CAN we only have DV LST
    #  anyway.
    lst = row['dv_flow_obs_timezone']
    utc = cs.tz_abbreviation_to_utc(lst) # e.g. 'UTC-04'
    offset = cs.relative_utc_to_float_offset_in_hours(utc) # e.g. -4.0
    
    # Open files wat xarray, update time values with pandas and replace in file
    for file in all_files:
        print(f'Processing {file}')
        with nc4.Dataset(file, 'a') as f:
            time_variable = f.variables['time']
            time_variable[:] = time_variable[:] + offset
print(debug_message)


!!! CHECK DEBUGGING STATUS: 
- Testing 1 basin

--- Now running basin 0. CAN_01AD002
Processing /Users/wmk934/data/CAMELS_spat/camels-spat-data/basin_data/CAN_01AD002/forcing/raw/EM_Earth_1950-01.nc
Processing /Users/wmk934/data/CAMELS_spat/camels-spat-data/basin_data/CAN_01AD002/forcing/raw/ERA5_1950-01.nc
Processing /Users/wmk934/data/CAMELS_spat/camels-spat-data/basin_data/CAN_01AD002/forcing/raw/ERA5_1950-02.nc
Processing /Users/wmk934/data/CAMELS_spat/camels-spat-data/basin_data/CAN_01AD002/forcing/raw/ERA5_1950-03.nc
Processing /Users/wmk934/data/CAMELS_spat/camels-spat-data/basin_data/CAN_01AD002/forcing/raw/ERA5_1950-04.nc
Processing /Users/wmk934/data/CAMELS_spat/camels-spat-data/basin_data/CAN_01AD002/forcing/raw/ERA5_2023-01-01_invariants.nc
Processing /Users/wmk934/data/CAMELS_spat/camels-spat-data/basin_data/CAN_01AD002/forcing/lumped/EM-Earth_lumped_remapped_1950-01-01-00-00-00.nc
Processing /Users/wmk934/data/CAMELS_spat/camels-spat-data/basin_data/CAN_01AD002/forcing/l