# Convert Canada Reference Basin Hydrometric Network metadata file type
Metadata file is provided as `.xlsx`. We want this as `.csv` for easier use with HYDAT downloads in R.

In [1]:
import sys
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs
import pandas as pd

### Config handling

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [3]:
# Get the required info from the config file
data_path = cs.read_from_config(config_file,'data_path')
shps_path = cs.read_from_config(config_file,'ref_shps_path')
file_url  = cs.read_from_config(config_file,'can_rhbn_meta_url')

### Define file location

In [4]:
# Construct the download location
download_folder = Path(data_path) / shps_path / 'RHBN-CAN'

In [24]:
# Make the file name
file_name_xlsx = file_url.split('/')[-1].strip() # Get the last part of the url, strip whitespace and characters

In [26]:
file_name_csv = file_name_xlsx.replace('xlsx','csv')

### Process file
File organization (when read as Pandas dataframe):
- Headers: Long description of column in English
- Row 0: Long description of column in French
- Row 1: Column name in English
- Row 2: Column name in French
- Row 3-1284: data

In [15]:
# Open the file
df = pd.read_excel(str(download_folder/file_name_xlsx))

In [16]:
# Replace long English header with short one
df = df.rename(columns=df.loc[1])

In [18]:
# Drop the rows with the long descriptions (French) and the column names (English [now headers] & French)
# Retain the English column names (row 1)
df = df.drop(labels=[0,1,2], axis=0)

In [22]:
# Reset the row index to avoid issues later
df = df.reset_index(drop=True)

In [30]:
df.to_csv(download_folder/file_name_csv, encoding='utf-8', index=False)