# Prepare data folders
Creates a destination folder tree for the CAMELS-spat catchments. Also copies reference files if available.

In [1]:
import sys
import pandas as pd
import geopandas as gpd
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

### Config handling

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [3]:
# Get the required info from the config file
data_path     = cs.read_from_config(config_file,'data_path')
ref_shps_path = cs.read_from_config(config_file,'ref_shps_path')
cs_meta_path  = cs.read_from_config(config_file,'cs_basin_path')
cs_meta_name  = cs.read_from_config(config_file,'cs_meta_name')

### Load the metadata and source shapefiles

In [4]:
# CAMELS-spat metadata file
cs_meta_path = Path(data_path) / cs_meta_path
cs_meta = pd.read_csv(cs_meta_path / cs_meta_name)

In [5]:
# Ensure that we know the correct column indices for what we're about to do
c_ref,c_src,c_area  = 12,13,14
assert (cs_meta.columns[c_ref]  == 'Ref_shape') # If these aren't true, they'll pop an error
assert (cs_meta.columns[c_src]  == 'Ref_shape_source')
assert (cs_meta.columns[c_area] == 'Ref_shape_area_km2')

In [6]:
# Reference shapefiles
ref_shps_path = Path(data_path) / ref_shps_path
ref_camels_us = gpd.read_file(ref_shps_path / 'CAMELS-US' / 'HCDN_nhru_final_671.shp') # CAMELS-US
ref_wsc2016   = gpd.read_file(ref_shps_path / 'RHBN-CAN' / 'WSC2016' / 'WSC2016_basins.shp') # WSC2016
ref_wsc2022_b = gpd.read_file(ref_shps_path / 'RHBN-CAN' / 'WSC2022' / 'WSC2022_basins.shp') # WSC2022 basins
ref_wsc2022_p = gpd.read_file(ref_shps_path / 'RHBN-CAN' / 'WSC2022' / 'WSC2022_basins.shp') # WSC2022 pour points
ref_wsc2022_s = gpd.read_file(ref_shps_path / 'RHBN-CAN' / 'WSC2022' / 'WSC2022_basins.shp') # WSC2022 stations

In [7]:
# Make a dedicated data folder
cs_basin_path = Path(data_path) / cs_meta_path / 'basin_data'
cs_basin_path.mkdir(parents=True, exist_ok=True)

In [8]:
# For basin in CAMELS-spat metadata
for i,row in cs_meta.iterrows():
    
    # 1. Make destination folder
    dest = cs_basin_path / (row['Country'] + '_' + row['Station_id']) / 'shapefiles' / 'reference'
    dest.mkdir(parents=True, exist_ok=True)
    
    # 2. Copy reference shape to destination if available
    if row['Country'] == 'USA':
        
        # Check if a reference shape exists, and process if so
        mask = ref_camels_us['hru_id'].astype('str') == row['Station_id'] # ref_camels_us['hru_id'] is type Int. Make Str for matching
        if any(mask): 
            basin,src = cs.process_camels_us_ref_shape(ref_camels_us,mask) # Returns: basin shape in EPSG:4326
            
    elif row['Country'] == 'CAN':
        
        # Check if a reference shape exists in the WSC2022 data, and process if so
        mask = ref_wsc2022_b['StationNum'] == row['Station_id']
        if any(mask):
            basin,src = cs.process_wsc2022_ref_shape(ref_wsc2022_b,mask) # Returns: basin shape in EPSG:4326
        else:
            # Check if a reference shape exists in the WSC2016 data, and process if so
            # We do this only if we found nothing in WSC2022, because WSC2022 is supposed to be more accurate than WSC2016
            mask = ref_wsc2016['Station'] == row['Station_id']
            if any(mask):
                basin,src = cs.process_wsc2016_ref_shape(ref_wsc2016,mask) # Returns: basin shape in EPSG:4326
        
    # 3. Update metadata file with reference shape info
    if any(mask):
        basin.to_file(dest / ('reference_' + row['Station_id'] + '.shp')) # Save to file    
        cs_meta.iat[i,c_ref]  = 'yes'
        cs_meta.iat[i,c_src]  = src
        cs_meta.iat[i,c_area] = basin['Area_km2']
    else: 
        cs_meta.iat[i,c_ref]  = 'no'
        # Reference source is already at NaN in the metadata file, so we can leave that
        # Reference area is already at -999 in the metadata file, so we can leave that    

In [9]:
# Save the updated metadata file
cs_meta.to_csv(cs_meta_path / cs_meta_name)