# Prepare CAMELS-spat metadata file

In [27]:
import sys
import pandas as pd
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

### Config handling

In [28]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [29]:
# Get the required info from the config file
data_path    = cs.read_from_config(config_file,'data_path')
shps_path    = cs.read_from_config(config_file,'ref_shps_path')
cs_meta_path = cs.read_from_config(config_file,'cs_basin_path')
cs_meta_name = cs.read_from_config(config_file,'cs_meta_name')
ref_can_rhbn_file = cs.read_from_config(config_file,'can_rhbn_meta_url')
ref_can_hydt_file = cs.read_from_config(config_file,'can_hydat_db_url')
ref_usa_topo_file = cs.read_from_config(config_file,'us_camels_topo_url')
ref_usa_name_file = cs.read_from_config(config_file,'us_camels_name_url')

### Load the metadata files

In [30]:
# Define RHBN and HYDAT metadata file location
ref_can_rhbn_name = ref_can_rhbn_file.split('/')[-1].strip().replace('xlsx','csv') # file name
ref_can_rhbn_data = Path(data_path) / shps_path / 'RHBN-CAN' / ref_can_rhbn_name # full path to file
ref_can_hydt_name = ref_can_hydt_file.split('/')[-1].strip().replace('.zip','_RHBN_2020_subset.csv')
ref_can_hydt_data = Path(data_path) / shps_path / 'RHBN-CAN' / ref_can_hydt_name 

In [31]:
# Define CAMELS-US metadata file locations
ref_usa_topo_name = ref_usa_topo_file.split('/')[-1].strip().replace('txt','csv')
ref_usa_topo_meta = Path(data_path) / shps_path / 'CAMELS-US' / ref_usa_topo_name
ref_usa_name_name = ref_usa_name_file.split('/')[-1].strip().replace('txt','csv')
ref_usa_name_meta = Path(data_path) / shps_path / 'CAMELS-US' / ref_usa_name_name

In [32]:
# Load the metadata files
rhbn_data = pd.read_csv(str(ref_can_rhbn_data))
hydt_data = pd.read_csv(str(ref_can_hydt_data))
cmls_topo = pd.read_csv(str(ref_usa_topo_meta))
cmls_name = pd.read_csv(str(ref_usa_name_meta))

In [33]:
# Merge the RHBN and HYDAT files into one
rhbn_meta = rhbn_data.merge(hydt_data, on='STATION_NUMBER')

In [34]:
# Merge the CAMELS files into one
cmls_meta = cmls_name.merge(cmls_topo, on='gauge_id')

### Make data folder

In [35]:
cs_meta_path = Path(data_path) / cs_meta_path
cs_meta_path.mkdir(parents=True, exist_ok=True)

### Make the csv

In [36]:
# Concatenate RHBN and CAMELS-US metadata
# Note: we do this in a loop so we can harmonize the different column names. Not super elegant but it's a one-off thing\
# Note: appending to lists is much quicker than appending to dataframe (https://stackoverflow.com/a/56746204)
data = []
for i,row in rhbn_meta.iterrows(): 
    data.append(['CAN',                                      # Country
                 row['STATION_NUMBER'], row['STATION_NAME'], # Station ID, Station name
                 row['LATITUDE'], row['LONGITUDE'], 'HYDAT', # Station lat, station lon, station source
                 -999, -999, 'n/a',                          # Outlet lat, outlet lon, outlet source (to be filled later)
                 -999,                                       # CAMELS-spat basin area (to be filled later)
                 'HYDAT gross drainage area', row['DRAINAGE_AREA_GROSS'], # Reference area 1 source, reference area 1 km2
                 'HYDAT effective drainage area', row['DRAINAGE_AREA_EFFECT'], # Reference area 2 source, reference area 2 km2
                 'n/a', 'n/a', -999 ])                       # Do we have a ref shape?, ref shape source, ref shape area

# TO DO: CAMELS-US
for i,row in cmls_meta.iterrows(): 
    data.append(['USA',                                      # Country
                 row['gauge_id'], row['gauge_name'],         # Station ID, Station name
                 row['gauge_lat'],row['gauge_lon'], 'CAMELS-US', # Station lat, station lon, station source
                 -999, -999, 'n/a',                          # Outlet lat, outlet lon, outlet source (to be filled later)
                 -999,                                       # CAMELS-spat basin area (to be filled later)
                 'GAGES II area', row['area_gages2'],        # Reference area 1 source, reference area 1 km2
                 'Geospatial fabric area', row['area_geospa_fabric'], # Reference area 1 source, reference area 1 km2
                 'n/a', 'n/a', -999 ])                       # Do we have a ref shape?, ref shape source, ref shape area

# Convert to dataframe
cs_meta = pd.DataFrame(data, columns = ['Country',
                                        'Station_id' , 'Station_name',
                                        'Station_lat', 'Station_lon', 'Station_source',
                                        'Outlet_lat' , 'Outlet_lon', 'Outlet_source',
                                        'Basin_area_km2',
                                        'Ref_area_1_src', 'Ref_area_1_km2',
                                        'Ref_area_2_src', 'Ref_area_2_km2',
                                        'Ref_shape', 'Ref_shape_source', 'Ref_shape_area_km2'])

In [37]:
cs_meta

Unnamed: 0,Country,Station_id,Station_name,Station_lat,Station_lon,Station_source,Outlet_lat,Outlet_lon,Outlet_source,Basin_area_km2,Ref_area_1_src,Ref_area_1_km2,Ref_area_2_src,Ref_area_2_km2,Ref_shape,Ref_shape_source,Ref_shape_area_km2
0,CAN,01AD002,SAINT JOHN RIVER AT FORT KENT,47.258060,-68.595833,HYDAT,-999,-999,,-999,HYDAT gross drainage area,14700.00,HYDAT effective drainage area,,,,-999
1,CAN,01AD003,ST. FRANCIS RIVER AT OUTLET OF GLASIER LAKE,47.206612,-68.956940,HYDAT,-999,-999,,-999,HYDAT gross drainage area,1350.00,HYDAT effective drainage area,,,,-999
2,CAN,01AE001,FISH RIVER NEAR FORT KENT,47.237499,-68.582779,HYDAT,-999,-999,,-999,HYDAT gross drainage area,2260.00,HYDAT effective drainage area,,,,-999
3,CAN,01AF007,GRANDE RIVIERE AT VIOLETTE BRIDGE,47.246971,-67.921280,HYDAT,-999,-999,,-999,HYDAT gross drainage area,339.00,HYDAT effective drainage area,,,,-999
4,CAN,01AF009,IROQUOIS RIVER AT MOULIN MORNEAULT,47.457829,-68.356827,HYDAT,-999,-999,,-999,HYDAT gross drainage area,182.00,HYDAT effective drainage area,,,,-999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1693,USA,14309500,"WEST FORK COW CREEK NEAR GLENDALE, OR",42.804000,-123.610910,CAMELS-US,-999,-999,,-999,GAGES II area,224.92,Geospatial fabric area,226.31,,,-999
1694,USA,14316700,"STEAMBOAT CREEK NEAR GLIDE, OR",43.349840,-122.728940,CAMELS-US,-999,-999,,-999,GAGES II area,587.90,Geospatial fabric area,588.01,,,-999
1695,USA,14325000,"SOUTH FORK COQUILLE RIVER AT POWERS, OR",42.891500,-124.070650,CAMELS-US,-999,-999,,-999,GAGES II area,443.07,Geospatial fabric area,444.92,,,-999
1696,USA,14362250,"STAR GULCH NEAR RUCH, OR.",42.154010,-123.075320,CAMELS-US,-999,-999,,-999,GAGES II area,41.42,Geospatial fabric area,43.88,,,-999


In [38]:
# Save the file
cs_meta.to_csv(cs_meta_path/cs_meta_name, encoding='utf-8', index=False)