# Merge Water Survey of Canada 2022 shapefiles
The source data contains different shapefiles provided per basin. Here we combine the shapefiles for different basins into a single shapefile. We repeat this for the three different pieces of geospatial information the WSC 2022 database provides:
- `[station]_DrainageBasin_BassinDeDrainage.shp`: basin outline
- `[station]_PourPoint_PointExutoire.shp`: pour point used for basin delineation (typically in the river)
- `[station]_Station.shp`: station location (typically on the river bank)

In [13]:
import os
import sys
import shutil
import pandas as pd
import geopandas as gpd
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

### Config handling

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [7]:
# Get the required info from the config file
data_path,_ = cs.read_from_config(config_file,'data_path')
shps_path,_ = cs.read_from_config(config_file,'ref_shps_path')
rhbn_meta,_ = cs.read_from_config(config_file,'can_rhbn_meta_url')

### Define file folders

In [5]:
# Construct the data location
data_folder = Path(data_path) / shps_path / 'RHBN-CAN' / 'WSC2022' / 'temp'

In [21]:
# Construct the output folder
merged_folder = Path(data_path) / shps_path / 'RHBN-CAN' / 'WSC2022'

### Define where the meta data file is
We don't need this for the merging itself, but it will let us check how many of the RHBN shapes we have.

In [6]:
# Construct the download location
meta_folder = Path(data_path) / shps_path / 'RHBN-CAN'

In [8]:
# Make the file name: Get the last part of the url, strip whitespace and characters, replace extension
meta_name = rhbn_meta.split('/')[-1].strip().replace('xlsx','csv') 

In [11]:
# Open the RHBN 2020 list
df = pd.read_csv(str(meta_folder/meta_name))

### Merge the WSC 2022 shapes

In [14]:
# Find the RHBN stations for which we have WSC 2022 shapefiles
stations = [ f.name for f in os.scandir(data_folder) if f.is_dir() ]

In [17]:
# Get a list of all existing shapefiles in the RBN database
basin_files = []
pourpoint_files = []
station_files = []
for root,dirs,all_files in os.walk(data_folder):
    for file in all_files:
        if file.endswith('DrainageBasin_BassinDeDrainage.shp'):
            basin_files.append(os.path.join(root,file))
        elif file.endswith('PourPoint_PointExutoire.shp'):
            pourpoint_files.append(os.path.join(root,file))
        elif file.endswith('Station.shp'):
            station_files.append(os.path.join(root,file))

In [19]:
# Merge the individual files into aggregated shapes
gdf_basins     = pd.concat([ gpd.read_file(file) for file in basin_files     ]).pipe(gpd.GeoDataFrame)
gdf_pourpoints = pd.concat([ gpd.read_file(file) for file in pourpoint_files ]).pipe(gpd.GeoDataFrame)
gdf_stations   = pd.concat([ gpd.read_file(file) for file in station_files   ]).pipe(gpd.GeoDataFrame)

In [22]:
# Save the files
gdf_basins.to_file(    merged_folder / 'WSC2022_basins.shp')
gdf_pourpoints.to_file(merged_folder / 'WSC2022_pourpoints.shp')
gdf_stations.to_file(  merged_folder / 'WSC2022_stations.shp')

In [23]:
# check how many of the basins we have shapefiles for
print('Found {} shapefiles matching one of {} Reference Hydrometric Basins'.format(len(gdf_basins),len(df)))

Found 1008 shapefiles matching one of 1027 Reference Hydrometric Basins


### Remove temporary files

In [24]:
shutil.rmtree(data_folder)